diff options
author | Maxime Ripard <mripard@kernel.org> | 2025-02-06 13:47:32 +0100 |
---|---|---|
committer | Maxime Ripard <mripard@kernel.org> | 2025-02-06 13:47:32 +0100 |
commit | 93c7dd1b39444ebd5a6a98e56a363d7a4e646775 (patch) | |
tree | 6e186e041d4253059a0e6471cb444ea35da5db09 /drivers/gpu/drm | |
parent | 2c1268e7aad0819f38e56134bbc2095fd95fde1b (diff) | |
parent | 2014c95afecee3e76ca4a56956a936e23283f05b (diff) |
Merge drm/drm-next into drm-misc-next
Bring rc1 to start the new release dev.
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'drivers/gpu/drm')
859 files changed, 28508 insertions, 13865 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 6f1cf235073d..fbef3f471bd0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -99,6 +99,7 @@ config DRM_KUNIT_TEST config DRM_KMS_HELPER tristate depends on DRM + select FB_CORE if DRM_FBDEV_EMULATION help CRTC helpers for KMS drivers. @@ -293,6 +294,8 @@ config DRM_TTM_HELPER tristate depends on DRM select DRM_TTM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Helpers for ttm-based gem objects @@ -300,6 +303,8 @@ config DRM_TTM_HELPER config DRM_GEM_DMA_HELPER tristate depends on DRM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Choose this if you need the GEM DMA helper functions @@ -307,6 +312,8 @@ config DRM_GEM_DMA_HELPER config DRM_GEM_SHMEM_HELPER tristate depends on DRM && MMU + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Choose this if you need the GEM shmem helper functions diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c7b18c52825d..5b21674b07fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -1,5 +1,5 @@ # -# Copyright 2017 Advanced Micro Devices, Inc. +# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -105,7 +105,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o # add IH block amdgpu-y += \ @@ -200,6 +200,7 @@ amdgpu-y += \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ vcn_v5_0_0.o \ + vcn_v5_0_1.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -208,7 +209,8 @@ amdgpu-y += \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ jpeg_v4_0_5.o \ - jpeg_v5_0_0.o + jpeg_v5_0_0.o \ + jpeg_v5_0_1.o # add VPE block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index f44de9d4b6a1..e13fbd974141 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -334,6 +334,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + /*TBD: Ideally should clear only GFX, SDMA blocks*/ + amdgpu_ras_clear_err_state(tmp_adev); r = aldebaran_mode2_restore_ip(tmp_adev); if (r) goto end; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4653a8d2823a..69895fccb474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -880,6 +880,7 @@ struct amdgpu_device { bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; + struct notifier_block pm_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; struct debugfs_blob_wrapper debugfs_discovery_blob; @@ -1174,7 +1175,6 @@ struct amdgpu_device { struct work_struct reset_work; - bool job_hang; bool dc_enabled; /* Mask of active clusters */ uint32_t aid_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 5ef6b745f222..f3289d289913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -71,6 +71,11 @@ struct ras_query_context; #define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) #define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */ +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + enum aca_reg_idx { ACA_REG_IDX_CTL = 0, ACA_REG_IDX_STATUS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index ec5e0dcf8613..deb0785350e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd) * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } @@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd) * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -236,7 +236,7 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block) ip_block->version->major, ip_block->version->minor); /* -ENODEV means board uses AZ rather than ACP */ if (r == -ENODEV) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } else if (r) { return r; @@ -508,7 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) /* return early if no ACP */ if (!adev->acp.acp_genpd) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -565,7 +565,7 @@ static int acp_suspend(struct amdgpu_ip_block *ip_block) /* power up on suspend */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -575,7 +575,7 @@ static int acp_resume(struct amdgpu_ip_block *ip_block) /* power down again on resume */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } @@ -584,19 +584,19 @@ static bool acp_is_idle(void *handle) return true; } -static int acp_set_clockgating_state(void *handle, +static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int acp_set_powergating_state(void *handle, +static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3afcd1e8aa54..2c1b38c5cfc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -368,7 +368,7 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(*bo, true); + (void)amdgpu_bo_reserve(*bo, true); amdgpu_bo_kunmap(*bo); amdgpu_bo_unpin(*bo); amdgpu_bo_unreserve(*bo); @@ -715,8 +715,9 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && - ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { + if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) || + (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && @@ -724,7 +725,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) /* Disable GFXOFF and PG. Temporary workaround * to fix some compute applications issue on GFX9. */ - adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state); + struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (gfx_block != NULL) + gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state); } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, @@ -834,7 +837,7 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4b80ad860639..8af67f18500a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -433,6 +433,9 @@ void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault); + #else static inline int kgd2kfd_init(void) { @@ -518,5 +521,12 @@ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; } + +static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault) +{ + return false; +} + #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index cc66ebb7bae1..441568163e20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, uint32_t low, high; uint64_t queue_addr = 0; + if (!amdgpu_gpu_recovery) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); @@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t low, high, pipe_reset_data = 0; uint64_t queue_addr = 0; + if (!amdgpu_gpu_recovery) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f30548f4c3b3..1e998f972c30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -730,7 +730,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, return; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); @@ -779,7 +779,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, } amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -989,7 +989,7 @@ unwind: if (!attachment[i]) continue; if (attachment[i]->bo_va) { - amdgpu_bo_reserve(bo[i], true); + (void)amdgpu_bo_reserve(bo[i], true); if (--attachment[i]->bo_va->ref_count == 0) amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); @@ -1259,11 +1259,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, return -EBUSY; } - amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); - amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update); + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update); return 0; } @@ -2352,7 +2352,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; - amdgpu_bo_reserve(bo, true); + (void)amdgpu_bo_reserve(bo, true); amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 45affc02548c..423fd2eebe1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -47,35 +47,37 @@ /* Check if current bios is an ATOM BIOS. * Return true if it is ATOM BIOS. Otherwise, return false. */ -static bool check_atom_bios(uint8_t *bios, size_t size) +static bool check_atom_bios(struct amdgpu_device *adev, size_t size) { uint16_t tmp, bios_header_start; + uint8_t *bios = adev->bios; if (!bios || size < 0x49) { - DRM_INFO("vbios mem is null or mem size is wrong\n"); + dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n"); return false; } if (!AMD_IS_VALID_VBIOS(bios)) { - DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]); + dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0], + bios[1]); return false; } bios_header_start = bios[0x48] | (bios[0x49] << 8); if (!bios_header_start) { - DRM_INFO("Can't locate bios header\n"); + dev_dbg(adev->dev, "Can't locate VBIOS header\n"); return false; } tmp = bios_header_start + 4; if (size < tmp) { - DRM_INFO("BIOS header is broken\n"); + dev_dbg(adev->dev, "VBIOS header is broken\n"); return false; } if (!memcmp(bios + tmp, "ATOM", 4) || !memcmp(bios + tmp, "MOTA", 4)) { - DRM_DEBUG("ATOMBIOS detected\n"); + dev_dbg(adev->dev, "ATOMBIOS detected\n"); return true; } @@ -118,7 +120,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); iounmap(bios); - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -146,7 +148,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); pci_unmap_rom(adev->pdev, bios); - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -186,7 +188,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev) /* read complete BIOS */ amdgpu_asic_read_bios_from_rom(adev, adev->bios, len); - if (!check_atom_bios(adev->bios, len)) { + if (!check_atom_bios(adev, len)) { kfree(adev->bios); return false; } @@ -216,7 +218,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, romlen); iounmap(bios); - if (!check_atom_bios(adev->bios, romlen)) + if (!check_atom_bios(adev, romlen)) goto free_bios; adev->bios_size = romlen; @@ -324,7 +326,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) break; } - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -389,7 +391,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) vhdr->ImageLength, GFP_KERNEL); - if (!check_atom_bios(adev->bios, vhdr->ImageLength)) { + if (!check_atom_bios(adev, vhdr->ImageLength)) { kfree(adev->bios); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 16153d275d7a..68bce6a6d09d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, + AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 471f3dc81e8d..5cc5f59e3018 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; + /* Make sure VRAM is allocated contigiously */ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - for (i = 0; i < (*bo)->placement.num_placement; i++) - (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; + if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && + !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { + + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; + } return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a68338cb7b4a..49ca8c814455 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2095,6 +2095,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); + amdgpu_debugfs_vcn_sched_mask_init(adev); amdgpu_debugfs_jpeg_sched_mask_init(adev); amdgpu_debugfs_gfx_sched_mask_init(adev); amdgpu_debugfs_compute_sched_mask_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 946c48829f19..824f9da5b6ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -343,11 +343,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; - if (job && job->vm) { - struct amdgpu_vm *vm = job->vm; + if (job && job->pasid) { struct amdgpu_task_info *ti; - ti = amdgpu_vm_get_task_info_vm(vm); + ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid); if (ti) { coredump->reset_task_info = *ti; amdgpu_vm_put_task_info(ti); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 96316111300a..d100bb7a137c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = { "LAST", }; -#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) /* * Default init level where all blocks are expected to be initialized. This is * the level of initialization expected by default and also after a full reset @@ -199,14 +199,16 @@ void amdgpu_set_init_level(struct amdgpu_device *adev, } static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data); /** * DOC: pcie_replay_count * * The amdgpu driver provides a sysfs API for reporting the total number - * of PCIe replays (NAKs) + * of PCIe replays (NAKs). * The file pcie_replay_count is used for this and returns the total - * number of replays as a sum of the NAKs generated and NAKs received + * number of replays as a sum of the NAKs generated and NAKs received. */ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, @@ -417,6 +419,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); + if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + return false; + if (adev->has_pr3 || ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) return true; @@ -429,8 +434,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * @dev: drm_device pointer * * Return: - * 1 if the device supporte BACO; - * 3 if the device support MACO (only works if BACO is supported) + * 1 if the device supports BACO; + * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ int amdgpu_device_supports_baco(struct drm_device *dev) @@ -577,7 +582,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_aper_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperture * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -668,7 +673,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * here is that the GPU reset is not running on another thread in parallel. * * For this we trylock the read side of the reset semaphore, if that succeeds - * we know that the reset is not running in paralell. + * we know that the reset is not running in parallel. * * If the trylock fails we assert that we are either already holding the read * side of the lock or are the reset thread itself and hold the write side of @@ -1399,6 +1404,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); @@ -1733,7 +1739,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) uint32_t fw_ver; err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); - /* force vPost if error occured */ + /* force vPost if error occurred */ if (err) return true; @@ -2165,7 +2171,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) continue; r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2199,7 +2205,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_powergating_state) continue; r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2359,8 +2365,8 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, - ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s>\n", + adev->num_ip_blocks, ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks].adev = adev; @@ -2378,7 +2384,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, * the module parameter virtual_display. This feature provides a virtual * display hardware on headless boards or in virtualized environments. * This function parses and validates the configuration string specified by - * the user and configues the virtual display configuration (number of + * the user and configures the virtual display configuration (number of * virtual connectors, crtcs, etc.) specified. */ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) @@ -2441,7 +2447,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Parses the asic configuration parameters specified in the gpu info - * firmware and makes them availale to the driver for use in configuring + * firmware and makes them available to the driver for use in configuring * the asic. * Returns 0 on success, -EINVAL on failure. */ @@ -2482,6 +2488,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_gpu_info.bin", chip_name); if (err) { dev_err(adev->dev, @@ -2501,7 +2508,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); /* - * Should be droped when DAL no longer needs it. + * Should be dropped when DAL no longer needs it. */ if (adev->asic_type == CHIP_NAVI12) goto parse_soc_bounding_box; @@ -3061,7 +3068,7 @@ init_failed: * * Writes a reset magic value to the gart pointer in VRAM. The driver calls * this function before a GPU reset. If the value is retained after a - * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. + * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents. */ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { @@ -3137,7 +3144,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", @@ -3174,7 +3181,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ - r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", @@ -3376,7 +3383,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_amdkfd_suspend(adev, false); - /* Workaroud for ASICs need to disable SMC first */ + /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { @@ -3478,7 +3485,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) WARN_ON_ONCE(adev->gfx.gfx_off_state); WARN_ON_ONCE(adev->gfx.gfx_off_req_count); - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } @@ -4306,7 +4313,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* * Reset domain needs to be present early, before XGMI hive discovered - * (if any) and intitialized to use reset sem and in_gpu reset flag + * (if any) and initialized to use reset sem and in_gpu reset flag * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); @@ -4596,6 +4603,11 @@ fence_driver_init: amdgpu_device_check_iommu_direct_map(adev); + adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; + r = register_pm_notifier(&adev->pm_nb); + if (r) + goto failed; + return 0; release_ras_con: @@ -4660,6 +4672,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; + unregister_pm_notifier(&adev->pm_nb); + /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test */ @@ -4778,8 +4792,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { int ret; - /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + /* No need to evict vram on APUs unless going to S4 */ + if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4792,6 +4806,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events + * @nb: notifier block + * @mode: suspend mode + * @data: data + * + * This function is called when the system is about to suspend or hibernate. + * It is used to evict resources from the device before the system goes to + * sleep while there is still access to swap. + */ +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data) +{ + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); + int r; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + adev->in_s4 = true; + fallthrough; + case PM_SUSPEND_PREPARE: + r = amdgpu_device_evict_resources(adev); + /* + * This is considered non-fatal at this time because + * amdgpu_device_prepare() will also fatally evict resources. + * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 + */ + if (r) + drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + } + + return NOTIFY_DONE; +} + +/** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer @@ -4830,7 +4879,7 @@ int amdgpu_device_prepare(struct drm_device *dev) return 0; unprepare: - adev->in_s0ix = adev->in_s3 = false; + adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; return r; } @@ -5181,7 +5230,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_ras_set_fed(adev, false); + amdgpu_ras_clear_err_state(adev); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -5238,16 +5287,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, } /** - * amdgpu_device_has_job_running - check if there is any job in mirror list + * amdgpu_device_has_job_running - check if there is any unfinished job * * @adev: amdgpu_device pointer * - * check if there is any job in mirror list + * check if there is any job running on the device when guest driver receives + * FLR notification from host driver. If there are still jobs running, then + * the guest driver will not respond the FLR reset. Instead, let the job hit + * the timeout and guest driver then issue the reset request. */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { int i; - struct drm_sched_job *job; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -5255,11 +5306,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) if (!amdgpu_ring_sched_ready(ring)) continue; - spin_lock(&ring->sched.job_list_lock); - job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - spin_unlock(&ring->sched.job_list_lock); - if (job) + if (amdgpu_fence_count_emitted(ring)) return true; } return false; @@ -5484,7 +5531,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ - amdgpu_ras_set_fed(tmp_adev, false); + amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5818,6 +5865,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int retry_limit = AMDGPU_MAX_RETRY_LIMIT; /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + /* * Special case: RAS triggered and full reset isn't supported */ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); @@ -5900,7 +5959,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_amdkfd_pre_reset(tmp_adev, reset_context); /* - * Mark these ASICs to be reseted as untracked first + * Mark these ASICs to be reset as untracked first * And add them back after reset completed */ amdgpu_unregister_gpu_instance(tmp_adev); @@ -6099,19 +6158,56 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, } /** + * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * AMD dGPU which may be a virtual upstream bridge. + */ +static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + parent = pci_upstream_bridge(parent); + if (parent && parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + while ((parent = pci_upstream_bridge(parent))) { + if (parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + } + } + } else { + /* use the device itself */ + *speed = pcie_get_speed_cap(adev->pdev); + *width = pcie_get_width_cap(adev->pdev); + } +} + +/** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer * - * Fetchs and stores in the driver the PCIE capabilities (gen speed + * Fetches and stores in the driver the PCIE capabilities (gen speed * and lanes) of the slot the device is in. Handles APUs and * virtualized environments where PCIE config space may not be available. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { - struct pci_dev *pdev; enum pci_bus_speed speed_cap, platform_speed_cap; - enum pcie_link_width platform_link_width; + enum pcie_link_width platform_link_width, link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -6133,11 +6229,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, &platform_link_width); + amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ - pdev = adev->pdev; - speed_cap = pcie_get_speed_cap(pdev); if (speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | @@ -6193,51 +6288,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { + /* asic caps */ + if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK; + } else { + switch (link_width) { + case PCIE_LNK_X32: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X16: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X12: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X8: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X4: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X2: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X1: + adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1; + break; + default: + break; + } + } + /* platform caps */ if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { switch (platform_link_width) { case PCIE_LNK_X32: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X16: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X12: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X8: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X4: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X2: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X1: - adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; + adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 1040204ac8b9..949d74eff294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -104,7 +104,9 @@ #include "smuio_v13_0_6.h" #include "smuio_v14_0_2.h" #include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" #include "jpeg_v5_0_0.h" +#include "jpeg_v5_0_1.h" #include "amdgpu_vpe.h" #if defined(CONFIG_DRM_AMD_ISP) @@ -1340,7 +1342,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) */ if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) { - adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config = ip->revision & 0xc0; adev->vcn.num_vcn_inst++; adev->vcn.inst_mask |= @@ -1705,7 +1707,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) * so this won't overflow. */ for (v = 0; v < adev->vcn.num_vcn_inst; v++) { - adev->vcn.vcn_codec_disable_mask[v] = + adev->vcn.inst[v].vcn_codec_disable_mask = le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); } break; @@ -1836,6 +1838,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1890,6 +1893,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2013,6 +2017,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): @@ -2184,6 +2189,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2238,6 +2244,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 5): + case IP_VERSION(4, 4, 4): amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block); break; case IP_VERSION(5, 0, 0): @@ -2361,6 +2368,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); break; + case IP_VERSION(5, 0, 1): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2405,6 +2416,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): aqua_vanjaram_init_soc_config(adev); break; default: @@ -2652,6 +2664,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->family = AMDGPU_FAMILY_AI; break; case IP_VERSION(9, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b119d27271c1..35c778426a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -33,6 +33,7 @@ #include "soc15_common.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" +#include "bif/bif_4_1_d.h" #include <asm/div64.h> #include <linux/pci.h> @@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) return 0; } +/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel() + * they are called from the panic handler, and protected by the drm_panic spinlock. + */ +static struct amdgpu_bo *panic_abo; + +/* Use the indirect MMIO to write each pixel to the GPU VRAM, + * This is a simplified version of amdgpu_device_mm_access() + */ +static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb, + unsigned int x, + unsigned int y, + u32 color) +{ + struct amdgpu_res_cursor cursor; + unsigned long offset; + struct amdgpu_bo *abo = panic_abo; + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + uint32_t tmp; + + offset = x * 4 + y * sb->pitch[0]; + amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor); + + tmp = cursor.start >> 31; + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000); + if (tmp != 0xffffffff) + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + WREG32_NO_KIQ(mmMM_DATA, color); +} + +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb) +{ + struct amdgpu_bo *abo; + struct drm_framebuffer *fb = plane->state->fb; + + if (!fb) + return -EINVAL; + + DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format); + + abo = gem_to_amdgpu_bo(fb->obj[0]); + if (!abo) + return -EINVAL; + + sb->width = fb->width; + sb->height = fb->height; + /* Use the generic linear format, because tiling will be disabled in panic_flush() */ + sb->format = drm_format_info(fb->format->format); + if (!sb->format) + return -EINVAL; + + sb->pitch[0] = fb->pitches[0]; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) { + drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n"); + return -EINVAL; + } + /* Only handle 32bits format, to simplify mmio access */ + if (fb->format->cpp[0] != 4) { + drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n"); + return -EINVAL; + } + sb->set_pixel = amdgpu_display_set_pixel; + panic_abo = abo; + return 0; + } + if (!abo->kmap.virtual && + ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) { + drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n"); + return -ENOMEM; + } + if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) + iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual); + else + iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 9d19940f73c8..dfa0d642ac16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,6 +23,8 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +#include <drm/drm_panic.h> + #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) @@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier); int amdgpu_display_suspend_helper(struct amdgpu_device *adev); int amdgpu_display_resume_helper(struct amdgpu_device *adev); +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index eaeaaddb32cd..817116e53d44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -280,7 +280,7 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) * Restrict the size of GTT domain (for userspace use) in MiB for testing. - * The default is -1 (Use 1/2 RAM, minimum value is 3GB). + * The default is -1 (Use value specified by TTM). */ MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); @@ -399,7 +399,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444); * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). */ MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); -module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); /** * DOC: bapm (int) @@ -457,7 +457,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); -module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444); /** * DOC: dc (int) @@ -568,14 +568,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); -module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** * DOC: emu_mode (int) * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). */ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); -module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444); /** * DOC: ras_enable (int) @@ -730,7 +730,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644); */ MODULE_PARM_DESC(force_asic_type, "A non negative value used to specify the asic type for all supported GPUs"); -module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); +module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444); /** * DOC: use_xgmi_p2p (int) @@ -749,7 +749,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444); * assigns queues to HQDs. */ int sched_policy = KFD_SCHED_POLICY_HWS; -module_param(sched_policy, int, 0444); +module_param_unsafe(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); @@ -799,7 +799,7 @@ MODULE_PARM_DESC(send_sigterm, * Setting 1 enables halt on hang. */ int halt_if_hws_hang; -module_param(halt_if_hws_hang, int, 0644); +module_param_unsafe(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); /** @@ -808,7 +808,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; -module_param(hws_gws_support, bool, 0444); +module_param_unsafe(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** @@ -841,7 +841,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa */ int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); -module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); +module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** @@ -849,7 +849,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm */ int amdgpu_mtype_local; MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); -module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); +module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) @@ -953,7 +953,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0644); +module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -1049,7 +1049,7 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * - 0x4: Disable GPU soft recovery, always do a full reset */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); -module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); /** * DOC: agp (int) @@ -2552,7 +2552,6 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - adev->in_s4 = true; r = amdgpu_device_suspend(drm_dev, true); adev->in_s4 = false; if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ceb5163480f4..09c9194d5bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -384,7 +384,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev) void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev) { - if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info) + if (!adev->fru_info) return; sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index bc58dca18035..98f3196599ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -32,7 +32,7 @@ struct amdgpu_fru_info { char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; char manufacturer_name[32]; - char fru_id[32]; + char fru_id[50]; }; int amdgpu_fru_get_product_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 2d4b67175b55..328a1b963548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3)) + return 0; + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 103513b1d23f..69429df09477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -88,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); - if (aobj) { - amdgpu_hmm_unregister(aobj); - ttm_bo_put(&aobj->tbo); - } + amdgpu_hmm_unregister(aobj); + ttm_bo_put(&aobj->tbo); } int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 69a6b6dba0a5..784b03abb3a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -515,7 +515,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev)) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; spin_lock(&kiq->ring_lock); @@ -567,7 +567,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang) + if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev)) return 0; if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { @@ -806,7 +806,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) /* If going to s2idle, no need to wait */ if (adev->in_s0ix) { if (!amdgpu_dpm_set_powergating_by_smu(adev, - AMD_IP_BLOCK_TYPE_GFX, true)) + AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } else { schedule_delayed_work(&adev->gfx.gfx_off_delay_work, @@ -818,7 +818,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); if (adev->gfx.gfx_off_state && - !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) { adev->gfx.gfx_off_state = false; if (adev->gfx.funcs->init_spm_golden) { @@ -1484,6 +1484,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) return 0; } +/** + * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * Provides the sysfs interface to manually run a cleaner shader, which is + * used to clear the GPU state between different tasks. Writing a value to the + * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution. + * The value written corresponds to the partition index on multi-partition + * devices. On single-partition devices, the value should be '0'. + * + * The cleaner shader clears the Local Data Store (LDS) and General Purpose + * Registers (GPRs) to ensure data isolation between GPU workloads. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, struct device_attribute *attr, const char *buf, @@ -1532,6 +1550,19 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, return count; } +/** + * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer to store the output data + * + * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' + * feature for each GPU partition. Reading from the 'enforce_isolation' + * sysfs file returns the isolation settings for all partitions, where '0' + * indicates disabled and '1' indicates enabled. + * + * Return: The number of bytes read from the sysfs file. + */ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, struct device_attribute *attr, char *buf) @@ -1555,6 +1586,20 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, return size; } +/** + * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * This function allows control over the 'enforce_isolation' feature, which + * serializes access to the graphics engine. Writing '1' or '0' to the + * 'enforce_isolation' sysfs file enables or disables process isolation for + * each partition. The input should specify the setting for all partitions. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1940,6 +1985,17 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) mutex_unlock(&adev->enforce_isolation_mutex); } +/** + * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation + * @adev: amdgpu_device pointer + * @idx: Index of the GPU partition + * + * When kernel submissions come in, the jobs are given a time slice and once + * that time slice is up, if there are KFD user queues active, kernel + * submissions are blocked until KFD has had its time slice. Once the KFD time + * slice is up, KFD user queues are preempted and kernel submissions are + * unblocked and allowed to run again. + */ static void amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, u32 idx) @@ -1985,10 +2041,20 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, msleep(GFX_SLICE_PERIOD_MS); } +/** + * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring begin_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -2007,15 +2073,28 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); } +/** + * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring end_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -2031,9 +2110,12 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); } /* @@ -2050,7 +2132,7 @@ static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_gfx_rings) - 1; + mask = (1ULL << adev->gfx.num_gfx_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2078,7 +2160,7 @@ static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { ring = &adev->gfx.gfx_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; @@ -2120,7 +2202,7 @@ static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_compute_rings) - 1; + mask = (1ULL << adev->gfx.num_compute_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2149,7 +2231,7 @@ static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_compute_rings; ++i) { ring = &adev->gfx.compute_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8b512dc28df8..2ea98ec60220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_ib_free - free an IB (Indirect Buffer) * - * @adev: amdgpu_device pointer * @ib: IB object to free * @f: the fence SA bo need wait on for the ib alloation * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f) +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + amdgpu_sa_bo_free(&ib->sa_bo, f); } /** @@ -193,8 +191,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) @@ -299,7 +297,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; - if (vm && ring->funcs->emit_switch_buffer) + if (job && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); if (ring->funcs->emit_wave_limit && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc6..901f8b12c672 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, dw2 = le32_to_cpu(ih->ring[ring_index + 2]); return dw1 | ((u64)(dw2 & 0xffff) << 32); } + +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) +{ + return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" : + ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown"; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..7d4395a5d8ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 263ce1811cc8..732744488b03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -77,7 +77,8 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev) sizeof(ucode_prefix)); /* read isp fw */ - r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix); + r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s.bin", ucode_prefix); if (r) { amdgpu_ucode_release(&adev->isp.fw); return r; @@ -128,13 +129,13 @@ static bool isp_is_idle(void *handle) return true; } -static int isp_set_clockgating_state(void *handle, +static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int isp_set_powergating_state(void *handle, +static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index b9d08bc96581..100f04475943 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -102,8 +102,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - adev->job_hang = true; - /* * Do the coredump immediately after a job timeout to get a very * close dump/snapshot/representation of GPU's current error status @@ -181,7 +179,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: - adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -197,11 +194,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!*job) return -ENOMEM; - /* - * Initialize the scheduler to at least some ring so that we always - * have a pointer to adev. - */ - (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); @@ -255,7 +247,6 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, void amdgpu_job_free_resources(struct amdgpu_job *job) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; unsigned i; @@ -268,7 +259,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) f = NULL; for (i = 0; i < job->num_ibs; ++i) - amdgpu_ib_free(ring->adev, &job->ibs[i], f); + amdgpu_ib_free(&job->ibs[i], f); } static void amdgpu_job_free_cb(struct drm_sched_job *s_job) @@ -367,6 +358,13 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } + /* + * The VM structure might be released after the VMID is + * assigned, we had multiple problems with people trying to use + * the VM pointer so better set it to NULL. + */ + if (!fence) + job->vm = NULL; } return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 3eb4a4653fce..d9cb343a8708 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -27,7 +27,8 @@ #include "amdgpu_ras.h" #define AMDGPU_MAX_JPEG_INSTANCES 4 -#define AMDGPU_MAX_JPEG_RINGS 8 +#define AMDGPU_MAX_JPEG_RINGS 10 +#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 016a6f6c4267..98528ee94c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -846,7 +846,7 @@ out: case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; - uint32_t pcie_gen_mask; + uint32_t pcie_gen_mask, pcie_width_mask; dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); if (!dev_info) @@ -934,15 +934,18 @@ out: dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; /* Combine the chip gen mask with the platform (CPU/mobo) mask. */ - pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16); + pcie_gen_mask = adev->pm.pcie_gen_mask & + (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT); + pcie_width_mask = adev->pm.pcie_mlw_mask & + (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT); dev_info->pcie_gen = fls(pcie_gen_mask); dev_info->pcie_num_lanes = - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size; dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 59ec20b07a6a..32b27a1658e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1610,10 +1610,12 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mes.bin", ucode_prefix); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index c6f93cbd6739..2df2444ee892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -40,7 +40,7 @@ #define AMDGPU_MES_VERSION_MASK 0x00000fff #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 -#define AMDGPU_MES_MSCRATCH_SIZE 0x8000 +#define AMDGPU_MES_MSCRATCH_SIZE 0x40000 enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index fc94b8b9b86d..96f4b8904e9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -41,6 +41,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" #include "amdgpu_vm.h" +#include "amdgpu_dma_buf.h" /** * DOC: amdgpu_object @@ -324,6 +325,9 @@ error_free: * * Allocates and pins a BO for kernel internal use. * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * * Returns: @@ -347,6 +351,76 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } +EXPORT_SYMBOL(amdgpu_bo_create_kernel); + +/** + * amdgpu_bo_create_isp_user - create user BO for isp + * + * @adev: amdgpu device object + * @dma_buf: DMABUF handle for isp buffer + * @domain: where to place it + * @bo: used to initialize BOs in structures + * @gpu_addr: GPU addr of the pinned BO + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate gpu_addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo, + u64 *gpu_addr) + +{ + struct drm_gem_object *gem_obj; + int r; + + gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf); + *bo = gem_to_amdgpu_bo(gem_obj); + if (!(*bo)) { + dev_err(adev->dev, "failed to get valid isp user bo\n"); + return -EINVAL; + } + + r = amdgpu_bo_reserve(*bo, false); + if (r) { + dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r); + return r; + } + + r = amdgpu_bo_pin(*bo, domain); + if (r) { + dev_err(adev->dev, "(%d) isp user bo pin failed\n", r); + goto error_unreserve; + } + + r = amdgpu_ttm_alloc_gart(&(*bo)->tbo); + if (r) { + dev_err(adev->dev, "%p bind failed\n", *bo); + goto error_unpin; + } + + if (!WARN_ON(!gpu_addr)) + *gpu_addr = amdgpu_bo_gpu_offset(*bo); + + amdgpu_bo_unreserve(*bo); + + return 0; + +error_unpin: + amdgpu_bo_unpin(*bo); +error_unreserve: + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); + + return r; +} +EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -423,6 +497,9 @@ error: * @cpu_addr: pointer to where the BO's CPU memory space address was stored * * unmaps and unpin a BO for kernel internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. */ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr) @@ -447,6 +524,30 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } +EXPORT_SYMBOL(amdgpu_bo_free_kernel); + +/** + * amdgpu_bo_free_isp_user - free BO for isp use + * + * @bo: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) +{ + if (bo == NULL) + return; + + if (amdgpu_bo_reserve(bo, true) == 0) { + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + } + amdgpu_bo_unref(&bo); +} +EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 23d2c6ab9d62..375448627f7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -260,6 +260,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dbuf, u32 domain, + struct amdgpu_bo **bo, + u64 *gpu_addr); int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr, void **cpu_addr); @@ -271,6 +275,7 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, struct amdgpu_bo_vm **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); @@ -335,8 +340,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct drm_suballoc **sa_bo, unsigned int size); -void amdgpu_sa_bo_free(struct amdgpu_device *adev, - struct drm_suballoc **sa_bo, +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 448f9e742983..babe94ade247 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -208,6 +208,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) psp->boot_time_tmr = false; fallthrough; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = false; @@ -359,6 +360,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, int i; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) return false; @@ -870,6 +872,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -2264,7 +2267,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return -EINVAL; if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA && - ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC) + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC && + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2) return -EINVAL; ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); @@ -2385,6 +2389,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->spdm_drv)) && + (psp->funcs->bootloader_load_spdm_drv != NULL)) { + ret = psp_bootloader_load_spdm_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load spdm_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3007,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; mutex_lock(&adev->firmware.mutex); - /* - * This sequence is just used on hw_init only once, no need on - * resume. - */ + ret = amdgpu_ucode_init_bo(adev); if (ret) goto failed; @@ -3135,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block) mutex_lock(&adev->firmware.mutex); + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; + ret = psp_hw_start(psp); if (ret) goto failed; @@ -3289,7 +3303,8 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *asd_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_asd.bin", chip_name); if (err) goto out; @@ -3311,7 +3326,8 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_toc.bin", chip_name); if (err) goto out; @@ -3407,6 +3423,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ipkeymgr_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_SPDM_DRV: + psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->spdm_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3474,7 +3496,8 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3750,7 +3773,8 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -3785,7 +3809,8 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s_cap.bin", chip_name); if (err) { if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); @@ -3849,13 +3874,13 @@ int psp_config_sq_perfmon(struct psp_context *psp, return ret; } -static int psp_set_clockgating_state(void *handle, +static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int psp_set_powergating_state(void *handle, +static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3867,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_ip_block *ip_block; uint32_t fw_ver; int ret; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3899,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, struct amdgpu_bo *fw_buf_bo = NULL; uint64_t fw_pri_mc_addr; void *fw_pri_cpu_addr; + struct amdgpu_ip_block *ip_block; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } @@ -3908,7 +3937,8 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (!drm_dev_enter(ddev, &idx)) return -ENODEV; - ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf); + ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s", buf); if (ret) goto fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 567cb1f924ca..8d5acc415d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -80,6 +80,7 @@ enum psp_bootloader_cmd { PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, + PSP_BL__LOAD_SPDMDRV = 0x20000000, }; enum psp_ring_type { @@ -120,6 +121,7 @@ struct psp_funcs { int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); + int (*bootloader_load_spdm_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -343,6 +345,7 @@ struct psp_context { struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; struct psp_bin_desc ipkeymgr_drv; + struct psp_bin_desc spdm_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -434,6 +437,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ipkeymgr_drv(psp) \ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) +#define psp_bootloader_load_spdm_drv(psp) \ + ((psp)->funcs->bootloader_load_spdm_drv ? \ + (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4c9fa24dd972..f0924aa3f4e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,6 +36,7 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "nbio_v4_3.h" +#include "nbif_v6_3_1.h" #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" @@ -192,7 +193,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -2015,6 +2016,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = true; break; @@ -2156,6 +2158,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; + /** + * If the current interrupt is caused by a non-fatal RAS error, skip + * check for fatal error. For fatal errors, FED status of all devices + * in XGMI hive gets set when the first device gets fatal error + * interrupt. The error gets propagated to other devices as well, so + * make sure to ack the interrupt regardless of FED status. + */ + if (!amdgpu_ras_get_fed_status(adev) && + amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) + return; if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) @@ -2185,6 +2197,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * if (ret) return; + amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block); /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler @@ -2717,40 +2730,203 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, return 0; } +static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) +{ + struct ta_ras_query_address_input addr_in; + uint32_t socket = 0; + int ret = 0; + + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.socket_id = socket; + addr_in.ma.ch_inst = bps->mem_channel; + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + + return ret; +} + +static int amdgpu_ras_mca2pa(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) +{ + struct ta_ras_query_address_input addr_in; + uint32_t die_id, socket = 0; + + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); + + /* although die id is gotten from PA in nps1 mode, the id is + * fitable for any nps mode + */ + if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa) + die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address, + bps->retired_page << AMDGPU_GPU_PAGE_SHIFT); + else + return -EINVAL; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.ch_inst = bps->mem_channel; + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = die_id; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + else + return -EINVAL; +} + /* it deal with vram only. */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages) + struct eeprom_table_record *bps, int pages, bool from_rom) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; + struct ras_err_data err_data; + struct eeprom_table_record *err_rec; + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras_context.ras->eeprom_control; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; - uint32_t i; + uint32_t i, j, loop_cnt = 1; + bool find_pages_per_pa = false; if (!con || !con->eh_data || !bps || pages <= 0) return 0; + if (from_rom) { + err_data.err_addr = + kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n"); + ret = -ENOMEM; + goto out; + } + + err_rec = err_data.err_addr; + loop_cnt = adev->umc.retire_unit; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + } + mutex_lock(&con->recovery_lock); data = con->eh_data; - if (!data) - goto out; + if (!data) { + /* Returning 0 as the absence of eh_data is acceptable */ + goto free; + } for (i = 0; i < pages; i++) { - if (amdgpu_ras_check_bad_page_unlock(con, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) - continue; + if (from_rom && + control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) { + if (!find_pages_per_pa) { + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) { + if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) { + /* may use old RAS TA, use PA to find pages in + * one row + */ + if (amdgpu_umc_pages_in_a_row(adev, &err_data, + bps[i].retired_page << + AMDGPU_GPU_PAGE_SHIFT)) { + ret = -EINVAL; + goto free; + } else { + find_pages_per_pa = true; + } + } else { + /* unsupported cases */ + ret = -EOPNOTSUPP; + goto free; + } + } + } else { + if (amdgpu_umc_pages_in_a_row(adev, &err_data, + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { + ret = -EINVAL; + goto free; + } + } + } else { + if (from_rom && !find_pages_per_pa) { + if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) { + /* bad page in any NPS mode in eeprom */ + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) { + ret = -EINVAL; + goto free; + } + } else { + /* legacy bad page in eeprom, generated only in + * NPS1 mode + */ + if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) { + /* old RAS TA or ASICs which don't support to + * convert addrss via mca address + */ + if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) { + find_pages_per_pa = true; + err_rec = &bps[i]; + loop_cnt = 1; + } else { + /* non-nps1 mode, old RAS TA + * can't support it + */ + ret = -EOPNOTSUPP; + goto free; + } + } + } - if (!data->space_left && - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { - ret = -ENOMEM; - goto out; + if (!find_pages_per_pa) + i += (adev->umc.retire_unit - 1); + } else { + err_rec = &bps[i]; + } } - amdgpu_ras_reserve_page(adev, bps[i].retired_page); + for (j = 0; j < loop_cnt; j++) { + if (amdgpu_ras_check_bad_page_unlock(con, + err_rec[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + + if (!data->space_left && + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { + ret = -ENOMEM; + goto free; + } - memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); - data->count++; - data->space_left--; + amdgpu_ras_reserve_page(adev, err_rec[j].retired_page); + + memcpy(&data->bps[data->count], &(err_rec[j]), + sizeof(struct eeprom_table_record)); + data->count++; + data->space_left--; + } } + +free: + if (from_rom) + kfree(err_data.err_addr); out: mutex_unlock(&con->recovery_lock); @@ -2768,7 +2944,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count; + int save_count, unit_num, bad_page_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -2780,19 +2956,32 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->ras_num_recs; + bad_page_num = control->ras_num_bad_pages; + save_count = data->count - bad_page_num; mutex_unlock(&con->recovery_lock); + unit_num = save_count / adev->umc.retire_unit; if (new_cnt) - *new_cnt = save_count / adev->umc.retire_unit; + *new_cnt = unit_num; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_append(control, - &data->bps[control->ras_num_recs], - save_count)) { - dev_err(adev->dev, "Failed to save EEPROM table data!"); - return -EIO; + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[control->ras_num_recs], + save_count)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } else { + for (i = 0; i < unit_num; i++) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num + i * adev->umc.retire_unit], + 1)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); @@ -2821,11 +3010,32 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) return -ENOMEM; ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs); - if (ret) + if (ret) { dev_err(adev->dev, "Failed to load EEPROM table records!"); - else - ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs); + } else { + if (control->ras_num_recs > 1 && + adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + if ((bps[0].address == bps[1].address) && + (bps[0].mem_channel == bps[1].mem_channel)) + control->rec_type = AMDGPU_RAS_EEPROM_REC_PA; + else + control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA; + } + + ret = amdgpu_ras_eeprom_check(control); + if (ret) + goto out; + + /* HW not usable */ + if (amdgpu_ras_is_rma(adev)) { + ret = -EHWPOISON; + goto out; + } + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); + } + +out: kfree(bps); return ret; } @@ -3205,31 +3415,36 @@ static int amdgpu_ras_page_retirement_thread(void *param) int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; int ret; if (!con || amdgpu_sriov_vf(adev)) return 0; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - + control = &con->eeprom_control; + ret = amdgpu_ras_eeprom_init(control); if (ret) return ret; - /* HW not usable */ - if (amdgpu_ras_is_rma(adev)) - return -EHWPOISON; + if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) + control->rec_type = AMDGPU_RAS_EEPROM_REC_PA; + + /* default status is MCA storage */ + if (control->ras_num_recs <= 1 && + adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA; - if (con->eeprom_control.ras_num_recs) { + if (control->ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) return ret; amdgpu_dpm_send_hbm_bad_pages_num( - adev, con->eeprom_control.ras_num_recs); + adev, control->ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag( - adev, con->eeprom_control.bad_channel_bitmap); + adev, control->bad_channel_bitmap); con->update_channel_flag = false; } } @@ -3366,6 +3581,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -3378,7 +3594,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(14, 0, 3): return true; default: return false; @@ -3629,6 +3847,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; break; @@ -3704,7 +3923,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev) * check DF RAS */ adev->nbio.ras = &nbio_v4_3_ras; break; + case IP_VERSION(6, 3, 1): + if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF)) + /* unlike other generation of nbio ras, + * nbif v6_3_1 only support fatal error interrupt + * to inform software that DF is freezed due to + * system fatal error event. driver should not + * enable nbio ras in such case. Instead, + * check DF RAS + */ + adev->nbio.ras = &nbif_v6_3_1_ras; + break; case IP_VERSION(7, 9, 0): + case IP_VERSION(7, 9, 1): if (!adev->gmc.is_app_apu) adev->nbio.ras = &nbio_v7_9_ras; break; @@ -4083,7 +4314,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) if (!ras) return false; - return atomic_read(&ras->fed); + return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); } void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) @@ -4091,8 +4322,48 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) struct amdgpu_ras *ras; ras = amdgpu_ras_get_context(adev); + if (ras) { + if (status) + set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + else + clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + } +} + +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + ras->ras_err_state = 0; +} + +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); if (ras) - atomic_set(&ras->fed, !!status); + set_bit(block, &ras->ras_err_state); +} + +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) { + if (block == AMDGPU_RAS_BLOCK__ANY) + return (ras->ras_err_state != 0); + else + return test_bit(block, &ras->ras_err_state) || + test_bit(AMDGPU_RAS_BLOCK__LAST, + &ras->ras_err_state); + } + + return false; } static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6db772ecfee4..82db986c36a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -99,7 +99,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, - AMDGPU_RAS_BLOCK__LAST + AMDGPU_RAS_BLOCK__LAST, + AMDGPU_RAS_BLOCK__ANY = -1 }; enum amdgpu_ras_mca_block { @@ -482,6 +483,8 @@ struct ras_ecc_err { uint64_t ipid; uint64_t addr; uint64_t pa_pfn; + /* save global channel index across all UMC instances */ + uint32_t channel_idx; struct ras_err_pages err_pages; }; @@ -558,8 +561,8 @@ struct amdgpu_ras { struct ras_ecc_log_info umc_ecc_log; struct delayed_work page_retirement_dwork; - /* Fatal error detected flag */ - atomic_t fed; + /* ras errors detected */ + unsigned long ras_err_state; /* RAS event manager */ struct ras_event_manager __event_mgr; @@ -750,7 +753,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages); + struct eeprom_table_record *bps, int pages, bool from_rom); int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, unsigned long *new_cnt); @@ -952,6 +955,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block); +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev); +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f28f6b4ba765..52c16bfeccaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -470,9 +470,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) res = __write_table_ras_info(control); control->ras_num_recs = 0; + control->ras_num_bad_pages = 0; control->ras_fri = 0; - amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages); control->bad_channel_bitmap = 0; amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); @@ -559,7 +560,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { if (amdgpu_bad_page_threshold == -1) { dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", - con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold); + con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); dev_warn(adev->dev, "But GPU can be operated due to bad_page_threshold = -1.\n"); return false; @@ -621,6 +622,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); + struct amdgpu_device *adev = to_amdgpu_device(control); u32 a, b, i; u8 *buf, *pp; int res; @@ -723,6 +725,12 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, control->ras_num_recs = 1 + (control->ras_max_record_count + b - control->ras_fri) % control->ras_max_record_count; + + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) + control->ras_num_bad_pages = control->ras_num_recs; + else + control->ras_num_bad_pages = + control->ras_num_recs * adev->umc.retire_unit; Out: kfree(buf); return res; @@ -740,10 +748,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) /* Modify the header if it exceeds. */ if (amdgpu_bad_page_threshold != 0 && - control->ras_num_recs >= ras->bad_page_cnt_threshold) { + control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) { dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; @@ -798,9 +806,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) */ if (amdgpu_bad_page_threshold != 0 && control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && - control->ras_num_recs < ras->bad_page_cnt_threshold) + control->ras_num_bad_pages < ras->bad_page_cnt_threshold) control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - - control->ras_num_recs) * 100) / + control->ras_num_bad_pages) * 100) / ras->bad_page_cnt_threshold; /* Recalc the checksum. @@ -841,7 +849,7 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); - int res; + int res, i; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -855,6 +863,10 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, return -EINVAL; } + /* set the new channel index flag */ + for (i = 0; i < num; i++) + record[i].retired_page |= UMC_CHANNEL_IDX_V2; + mutex_lock(&control->ras_tbl_mutex); res = amdgpu_ras_eeprom_append_table(control, record, num); @@ -864,6 +876,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); + + /* clear channel index flag, the flag is only saved on eeprom */ + for (i = 0; i < num; i++) + record[i].retired_page &= ~UMC_CHANNEL_IDX_V2; + return res; } @@ -1373,9 +1390,35 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) } control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + return 0; +} + +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int res; + + if (!__is_ras_eeprom_supported(adev)) + return 0; + + /* Verify i2c adapter is initialized */ + if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo) + return -ENOENT; + + if (!__get_eeprom_i2c_addr(adev, control)) + return -EINVAL; + + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) + control->ras_num_bad_pages = control->ras_num_recs; + else + control->ras_num_bad_pages = + control->ras_num_recs * adev->umc.retire_unit; + if (hdr->header == RAS_TABLE_HDR_VAL) { DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->ras_num_recs); + control->ras_num_bad_pages); if (hdr->version == RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); @@ -1390,9 +1433,9 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) /* Warn if we are at 90% of the threshold or above */ - if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold) + if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { @@ -1403,10 +1446,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) } res = __verify_ras_table_checksum(control); - if (res) - DRM_ERROR("RAS Table incorrect checksum or error:%d\n", + if (res) { + dev_err(adev->dev, "RAS Table incorrect checksum or error:%d\n", res); - if (ras->bad_page_cnt_threshold > control->ras_num_recs) { + return -EINVAL; + } + if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) { /* This means that, the threshold was increased since * the last time the system was booted, and now, * ras->bad_page_cnt_threshold - control->num_recs > 0, @@ -1416,13 +1461,13 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) dev_info(adev->dev, "records:%d threshold:%d, resetting " "RAS table header signature", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); res = amdgpu_ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); } else { dev_err(adev->dev, "RAS records:%d exceed threshold:%d", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); if (amdgpu_bad_page_threshold == -1) { dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); res = 0; @@ -1431,7 +1476,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) dev_err(adev->dev, "RAS records:%d exceed threshold:%d, " "GPU will not be initialized. Replace this GPU or increase the threshold", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); } } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index b9ebda577797..81d55cb7b397 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -43,6 +43,19 @@ enum amdgpu_ras_eeprom_err_type { AMDGPU_RAS_EEPROM_ERR_COUNT, }; +/* + * one UMC MCA address could map to multiply physical address (PA), + * such as 1:16, we use eeprom_table_record.address to store MCA + * address and use eeprom_table_record.retired_page to save PA. + * + * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA + * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address + */ +enum amdgpu_ras_eeprom_rec_type { + AMDGPU_RAS_EEPROM_REC_PA, + AMDGPU_RAS_EEPROM_REC_MCA, +}; + struct amdgpu_ras_eeprom_table_header { uint32_t header; uint32_t version; @@ -82,6 +95,11 @@ struct amdgpu_ras_eeprom_control { */ u32 ras_num_recs; + /* the bad page number is ras_num_recs or + * ras_num_recs * umc.retire_unit + */ + u32 ras_num_bad_pages; + /* First record index to read, 0-based. * Range is [0, num_recs-1]. This is * an absolute index, starting right after @@ -102,6 +120,7 @@ struct amdgpu_ras_eeprom_control { /* Record channel info which occurred bad pages */ u32 bad_channel_bitmap; + enum amdgpu_ras_eeprom_rec_type rec_type; }; /* @@ -145,6 +164,8 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index a0acb65f4b40..dabfbdf6f1ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -183,6 +183,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_init(adev); break; @@ -206,6 +207,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_fini(adev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 36fc9578c53c..dee5a1b4e572 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -462,8 +462,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 10df731998b2..39070b2a4c04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, return 0; } -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo, - struct dma_fence *fence) +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence) { if (sa_bo == NULL || *sa_bo == NULL) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 113f0d242618..174badca27e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -219,9 +219,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix)); if (instance == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s%d.bin", ucode_prefix, instance); if (err) goto out; @@ -261,6 +263,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 4) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) && adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && @@ -358,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->sdma.num_instances) - 1; + mask = BIT_ULL(adev->sdma.num_instances) - 1; if ((val & mask) == 0) return -EINVAL; for (i = 0; i < adev->sdma.num_instances; ++i) { ring = &adev->sdma.instance[i].ring; - if (val & (1 << i)) + if (val & BIT_ULL(i)) ring->sched.ready = true; else ring->sched.ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 2db58b5812a8..5f60736051d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -107,6 +107,7 @@ struct amdgpu_sdma { struct amdgpu_irq_src doorbell_invalid_irq; struct amdgpu_irq_src pool_timeout_irq; struct amdgpu_irq_src srbm_write_irq; + struct amdgpu_irq_src ctxt_empty_irq; int num_instances; uint32_t sdma_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c8180cad0abd..ff286940ab43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1762,7 +1762,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) if (!adev->bios && (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; @@ -2065,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 4c7b53648a50..cf700824b960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1434,6 +1434,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * * @adev: amdgpu device * @fw: pointer to load firmware to + * @required: whether the firmware is required * @fmt: firmware name format string * @...: variable arguments * @@ -1442,7 +1443,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * the error code to -ENODEV, so that early_init functions will fail to load. */ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...) + enum amdgpu_ucode_required required, const char *fmt, ...) { char fname[AMDGPU_UCODE_NAME_MAX]; va_list ap; @@ -1456,16 +1457,24 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, return -EOVERFLOW; } - r = request_firmware(fw, fname, adev->dev); + if (required == AMDGPU_UCODE_REQUIRED) + r = request_firmware(fw, fname, adev->dev); + else { + r = firmware_request_nowarn(fw, fname, adev->dev); + if (r) + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname); + } if (r) return -ENODEV; r = amdgpu_ucode_validate(*fw); - if (r) { + if (r) + /* + * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release() + * regardless of success/failure, and the amdgpu_ucode_release() takes care of + * firmware release and need to avoid redundant release FW operation here. + */ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname); - release_firmware(*fw); - *fw = NULL; - } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4150ec0aa10d..4eedd92f000b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -126,6 +126,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, PSP_FW_TYPE_PSP_IPKEYMGR_DRV, + PSP_FW_TYPE_PSP_SPDM_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -551,6 +552,11 @@ enum amdgpu_firmware_load_type { AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO, }; +enum amdgpu_ucode_required { + AMDGPU_UCODE_OPTIONAL, + AMDGPU_UCODE_REQUIRED, +}; + /* conform to smu_ucode_xfer_cz.h */ #define AMDGPU_SDMA0_UCODE_LOADED 0x00000001 #define AMDGPU_SDMA1_UCODE_LOADED 0x00000002 @@ -604,9 +610,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); -__printf(3, 4) +__printf(4, 5) int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...); + enum amdgpu_ucode_required required, const char *fmt, ...); void amdgpu_ucode_release(const struct firmware **fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 896f3609b0ee..eafe20d8fe0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt); + err_data->err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, &err_count); - amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, + con->eeprom_control.ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); @@ -444,3 +445,77 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, return ret; } + +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr) +{ + struct ta_ras_query_address_output addr_out; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + addr_out.pa.pa = pa_addr; + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL, + &addr_out, false); + else + return -EINVAL; +} + +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + int i, ret; + struct ras_err_data err_data; + + err_data.err_addr = kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n"); + return 0; + } + + ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr); + if (ret) + goto out; + + for (i = 0; i < adev->umc.retire_unit; i++) { + if (i >= len) + goto out; + + pfns[i] = err_data.err_addr[i].retired_page; + } + ret = i; + +out: + kfree(err_data.err_addr); + return ret; +} + +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + int ret; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, + addr_out, dump_addr); + if (ret) + return ret; + } else { + return 0; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index ce4179db2a6d..a4a7e61817aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,6 +54,22 @@ /* Page retirement tag */ #define UMC_ECC_NEW_DETECTED_TAG 0x1 +/* + * a flag to indicate v2 of channel index stored in eeprom + * + * v1 (legacy way): store channel index within a umc instance in eeprom + * range in UMC v12: 0 ~ 7 + * v2: store global channel index in eeprom + * range in UMC v12: 0 ~ 127 + * + * NOTE: it's better to store it in eeprom_table_record.mem_channel, + * but there is only 8 bits in mem_channel, and the channel number may + * increase in the future, we decide to save it in + * eeprom_table_record.retired_page. retired_page is useless in v2, + * we depend on eeprom_table_record.address instead of retired_page in v2. + * Only 48 bits are saved on eeprom, use bit 47 here. + */ +#define UMC_CHANNEL_IDX_V2 BIT_ULL(47) typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -70,6 +86,13 @@ struct amdgpu_umc_ras { enum amdgpu_mca_error_type type, void *ras_error_status); int (*update_ecc_status)(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); + int (*convert_ras_err_addr)(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr); + uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page); }; struct amdgpu_umc_funcs { @@ -134,4 +157,12 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, void *ras_error_status); +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr); +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len); +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index bd2d3863c3ed..dde15c6a96e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,8 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 31fd30dcd593..74758b5ffc6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); @@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) for (i = 0; i < abo->placement.num_placement; ++i) { abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + if (abo->placements[i].mem_type == TTM_PL_VRAM) + abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 599d3ca4e0ef..b9060bcd4806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); @@ -503,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); - amdgpu_ib_free(ring->adev, &ib_msg, f); + amdgpu_ib_free(&ib_msg, f); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3e94c3ba1ba2..83faf6e6788a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -62,6 +62,7 @@ #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" +#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -88,6 +89,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -99,11 +101,15 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i); + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_%d.bin", ucode_prefix, i); else - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix); + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (r) { - amdgpu_ucode_release(&adev->vcn.fw[i]); + amdgpu_ucode_release(&adev->vcn.inst[i].fw); return r; } } @@ -151,7 +157,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) adev->vcn.using_unified_queue = amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); - hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[0].fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -270,7 +276,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); - amdgpu_ucode_release(&adev->vcn.fw[j]); + amdgpu_ucode_release(&adev->vcn.inst[j].fw); } mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); @@ -282,7 +288,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; - int vcn_config = adev->vcn.vcn_config[vcn_instance]; + int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) ret = true; @@ -362,12 +368,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int offset; - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { memcpy_toio(adev->vcn.inst[i].cpu_addr, - adev->vcn.fw[i]->data + offset, + adev->vcn.inst[i].fw->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } @@ -580,7 +586,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -591,7 +597,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -773,7 +779,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -784,7 +790,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -1014,7 +1020,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; error: - amdgpu_ib_free(adev, &ib, fence); + amdgpu_ib_free(&ib, fence); dma_fence_put(fence); return r; @@ -1025,7 +1031,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; long r; - if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) { + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1063,7 +1070,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; /* currently only support 2 FW instances */ if (i >= 2) { dev_info(adev->dev, "More then 2 VCN FW instances!\n"); @@ -1071,12 +1078,14 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) } idx = AMDGPU_UCODE_ID_VCN + i; adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; + adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); if (amdgpu_ip_version(adev, UVD_HWIP, 0) == - IP_VERSION(4, 0, 3)) + IP_VERSION(4, 0, 3) || + amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(5, 0, 1)) break; } } @@ -1320,3 +1329,71 @@ void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); } } + +/* + * debugfs to enable/disable vcn job submission to specific core or + * instance. It is created only if the queue type is unified. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1ULL << adev->vcn.num_vcn_inst) - 1; + if ((val & mask) == 0) + return -EINVAL; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (val & (1ULL << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (ring->sched.ready) + mask |= 1ULL << i; + } + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, + amdgpu_debugfs_vcn_sched_mask_get, + amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); +#endif + +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.using_unified_queue) + return; + sprintf(name, "amdgpu_vcn_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_vcn_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1e32311c1dff..adaf4388ad28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -163,20 +163,30 @@ #define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ - bool video_range, aon_range; \ + bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \ if (video_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ (VCN_VID_IP_ADDRESS)); \ else if (aon_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ (VCN_AON_IP_ADDRESS)); \ + else if (video1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ else \ internal_reg_offset = (0xFFFFF & addr); \ \ @@ -297,6 +307,9 @@ struct amdgpu_vcn_inst { atomic_t dpg_enc_submission_cnt; struct amdgpu_vcn_fw_shared fw_shared; uint8_t aid_id; + const struct firmware *fw; /* VCN firmware */ + uint8_t vcn_config; + uint32_t vcn_codec_disable_mask; }; struct amdgpu_vcn_ras { @@ -306,15 +319,12 @@ struct amdgpu_vcn_ras { struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; - const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; bool indirect_sram; uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; - uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; - uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; struct mutex vcn_pg_lock; struct mutex vcn1_jpeg1_workaround; @@ -523,5 +533,6 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev); +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index c704e9803e11..0af469ec6fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1263,12 +1263,10 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev, if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) return 0; - tmp = kmalloc(used_size, GFP_KERNEL); + tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL); if (!tmp) return -ENOMEM; - memcpy(tmp, &host_telemetry->body.error_count, used_size); - if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 8bf28d336807..03308261f894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -632,13 +632,13 @@ static bool amdgpu_vkms_is_idle(void *handle) return true; } -static int amdgpu_vkms_set_clockgating_state(void *handle, +static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int amdgpu_vkms_set_powergating_state(void *handle, +static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 40a51bd128c7..5c07777d3239 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -785,12 +785,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) - ring->funcs->emit_cleaner_shader(ring); - - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && + !(job->enforce_isolation && !job->vmid)) return 0; amdgpu_ring_ib_begin(ring); @@ -801,6 +797,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); @@ -1334,10 +1335,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, * next command submission. */ if (amdgpu_vm_is_bo_always_valid(vm, bo)) { - uint32_t mem_type = bo->tbo.resource->mem_type; - - if (!(bo->preferred_domains & - amdgpu_mem_type_to_domain(mem_type))) + if (bo->tbo.resource && + !(bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 110b120d7375..121ee17b522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -236,7 +236,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) int ret; amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); - ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix); + ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", fw_prefix); if (ret) goto out; @@ -646,16 +647,16 @@ static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int vpe_set_clockgating_state(void *handle, +static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int vpe_set_powergating_state(void *handle, +static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; if (!adev->pm.dpm_enabled) @@ -833,7 +834,7 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout) ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7d26a962f811..ff5e52025266 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, else remaining_size -= size; } - mutex_unlock(&mgr->lock); if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { struct drm_buddy_block *dcc_block; @@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, (u64)vres->base.size, &vres->blocks); } + mutex_unlock(&mgr->lock); vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index e209b5e101df..23b6f7a4aa4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -427,7 +427,7 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev, return; sched = entity->entity.rq->sched; - if (sched->ready) { + if (drm_sched_wqueue_ready(sched)) { ring = to_amdgpu_ring(entity->entity.rq->sched); atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e2cb1f080e88..08d6787893b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2161,13 +2161,13 @@ static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_common_set_clockgating_state(void *handle, +static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_common_set_powergating_state(void *handle, +static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 1da17755ad53..444563486769 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -402,13 +402,13 @@ static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_ih_set_clockgating_state(void *handle, +static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_ih_set_powergating_state(void *handle, +static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ede1a028d48d..d9bd8f3f17e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -133,9 +133,11 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -696,7 +698,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1189,11 +1191,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int cik_sdma_set_clockgating_state(void *handle, +static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1204,7 +1206,7 @@ static int cik_sdma_set_clockgating_state(void *handle, return 0; } -static int cik_sdma_set_powergating_state(void *handle, +static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index d72973bd570d..82586b76aeda 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -398,14 +398,14 @@ static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cz_ih_set_clockgating_state(void *handle, +static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { // TODO return 0; } -static int cz_ih_set_powergating_state(void *handle, +static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { // TODO diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5098c50d54c8..c5e3d2251b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2687,6 +2687,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v10_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v10_0_panic_flush, +}; + static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2734,6 +2760,7 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs); return 0; } @@ -3302,13 +3329,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v10_0_set_clockgating_state(void *handle, +static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v10_0_set_powergating_state(void *handle, +static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index c5680ff4ab9f..ea42a4472bf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2800,6 +2800,32 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v11_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v11_0_panic_flush, +}; + static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2847,6 +2873,7 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs); return 0; } @@ -3434,13 +3461,13 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v11_0_set_clockgating_state(void *handle, +static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v11_0_set_powergating_state(void *handle, +static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index eb7de9122d99..915804a6a1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2602,6 +2602,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v6_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_ARRAY_MODE(0x7); + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v6_0_panic_flush, +}; + static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2629,6 +2655,7 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs); return 0; } @@ -3124,13 +3151,13 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, } -static int dce_v6_0_set_clockgating_state(void *handle, +static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v6_0_set_powergating_state(void *handle, +static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 04b79ff87f75..f2edc0fece5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2613,6 +2613,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v8_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); +} + +static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v8_0_panic_flush, +}; + static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2640,6 +2665,7 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs); return 0; } @@ -3212,13 +3238,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, } -static int dce_v8_0_set_clockgating_state(void *handle, +static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v8_0_set_powergating_state(void *handle, +static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 24dce803a829..5ba263fe5512 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -45,6 +45,7 @@ #include "clearstate_gfx10.h" #include "v10_structs.h" #include "gfx_v10_0.h" +#include "gfx_v10_0_cleaner_shader.h" #include "nbio_v2_3.h" /* @@ -3673,7 +3674,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -4036,7 +4037,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -4138,18 +4139,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4173,6 +4177,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4180,6 +4185,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -4733,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) break; } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 64 && + adev->gfx.pfp_fw_version >= 100 && + adev->gfx.mec_fw_version >= 122) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -5952,7 +5975,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) else WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - if (adev->job_hang && !enable) + if (amdgpu_in_reset(adev) && !enable) return 0; for (i = 0; i < adev->usec_timeout; i++) { @@ -6599,17 +6622,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80); break; default: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); break; } } @@ -7457,7 +7476,7 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) * otherwise the gfxoff disallowing will be failed to set. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) - gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE); + gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -8345,10 +8364,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -8383,10 +8402,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, return 0; } -static int gfx_v10_0_set_clockgating_state(void *handle, +static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h new file mode 100644 index 000000000000..663c2572d440 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_10_3_0 */ +static const u32 gfx_10_3_0_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020002, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm new file mode 100644 index 000000000000..0e1c246166c0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// GFX10.3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2ae058a224f4..56c06b72a70a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -615,7 +615,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -639,6 +639,7 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -688,6 +689,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -705,6 +707,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -720,9 +723,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && adev->pdev->revision == 0xCE) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -735,6 +740,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -1885,6 +1891,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1902,9 +1909,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } active_rb_bitmap &= global_active_rb_bitmap; @@ -3918,9 +3927,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -5458,10 +5465,10 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } -static int gfx_v11_0_set_powergating_state(void *handle, +static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5494,10 +5501,10 @@ static int gfx_v11_0_set_powergating_state(void *handle, return 0; } -static int gfx_v11_0_set_clockgating_state(void *handle, +static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -6646,30 +6653,14 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, r = 0; + int r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - - /* make sure dequeue is complete*/ - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + dev_err(adev->dev, "reset via MMIO failed %d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index fe7c48f2fb2a..2523221a2519 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -513,7 +513,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -537,6 +537,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -566,6 +567,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -573,6 +575,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -581,6 +584,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -593,6 +597,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -1347,6 +1352,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->gfx.me_fw_version >= 2480 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2680 && + adev->mes.fw_version[0] >= 100) + adev->gfx.enable_cleaner_shader = true; + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1437,11 +1450,19 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if ((adev->gfx.me_fw_version >= 2660) && + (adev->gfx.mec_fw_version >= 2920)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } + } if (!adev->enable_mes_kiq) { r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); @@ -1610,6 +1631,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1627,12 +1649,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -2832,9 +2856,7 @@ static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -3864,10 +3886,10 @@ static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) } #endif -static int gfx_v12_0_set_powergating_state(void *handle, +static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -3999,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } @@ -4115,15 +4126,15 @@ static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v12_0_set_clockgating_state(void *handle, +static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): gfx_v12_0_update_gfx_clock_gating(adev, @@ -5233,24 +5244,16 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int r, i; + int r; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + if (r) { + dev_err(adev->dev, "reset via MMIO failed %d\n", r); + return r; } - soc24_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h index bcc9c72ccbde..f7184b2dc4e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h @@ -26,4 +26,6 @@ extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block; +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 41f50bf380c4..f26e2cdec07a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -337,6 +337,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; @@ -345,6 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; @@ -353,6 +355,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -361,6 +364,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1906,7 +1910,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -3373,11 +3377,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v6_0_set_clockgating_state(void *handle, +static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -3395,11 +3399,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v6_0_set_powergating_state(void *handle, +static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 824d5913103b..84745b2453ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -934,33 +934,39 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; if (adev->asic_type == CHIP_KAVERI) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (err) goto out; } err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); out: if (err) { @@ -2324,7 +2330,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -4846,11 +4852,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v7_0_set_clockgating_state(void *handle, +static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -4869,11 +4875,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v7_0_set_powergating_state(void *handle, +static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b7006c41e270..6a025438f9d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -982,13 +982,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_pfp_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } if (err) @@ -999,13 +1002,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_me_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } if (err) @@ -1017,13 +1023,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_ce_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } if (err) @@ -1044,6 +1053,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->virt.chained_ib_support = false; err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1093,13 +1103,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } if (err) @@ -1112,13 +1125,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) (adev->asic_type != CHIP_TOPAZ)) { if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec2_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } if (!err) { @@ -1640,7 +1656,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) RREG32(sec_ded_counter_registers[i]); fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -4304,9 +4320,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32(mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32(mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32(mmRLC_CP_SCHEDULERS, tmp); + WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80); } static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -5321,7 +5335,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade (adev->asic_type == CHIP_POLARIS12) || (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -5367,10 +5381,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, } } -static int gfx_v8_0_set_powergating_state(void *handle, +static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5625,8 +5639,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { @@ -5720,8 +5732,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5731,8 +5741,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; @@ -5813,12 +5821,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev } gfx_v8_0_wait_for_rlc_serdes(adev); - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) * === MGCG + MGLS + TS(CG/LS) === @@ -5832,6 +5840,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); gfx_v8_0_update_medium_grain_clock_gating(adev, enable); } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5982,10 +5992,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v8_0_set_clockgating_state(void *handle, +static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0b6f09f2cc9b..fa572b40989e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1243,7 +1243,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -1429,18 +1429,21 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, int err; err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -1476,6 +1479,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc_am4.bin", chip_name); else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && (smu_version >= 0x41e2b)) @@ -1483,9 +1487,11 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. */ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_kicker_rlc.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1518,9 +1524,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_sjt_mec.bin", chip_name); + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; @@ -1531,9 +1539,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sjt_mec2.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -3488,9 +3498,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -4780,7 +4788,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) } fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -4956,8 +4964,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ @@ -5022,8 +5028,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -5034,8 +5038,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* Enable 3D CGCG/CGLS */ if (enable) { /* write cmd to clear cgcg/cgls ov */ @@ -5077,8 +5079,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5086,8 +5086,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ @@ -5129,13 +5127,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === @@ -5155,6 +5152,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === MGCG + MGLS === */ gfx_v9_0_update_medium_grain_clock_gating(adev, enable); } + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5232,10 +5230,10 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; -static int gfx_v9_0_set_powergating_state(void *handle, +static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -5277,10 +5275,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, return 0; } -static int gfx_v9_0_set_clockgating_state(void *handle, +static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3f4fd2f08163..d81449f9d822 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); if (r) { dev_err(adev->dev, "ib submit failed (%d).\n", r); - amdgpu_ib_free(adev, ib, NULL); + amdgpu_ib_free(ib, NULL); } return r; } @@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) } disp2_failed: - amdgpu_ib_free(adev, &disp_ibs[2], NULL); + amdgpu_ib_free(&disp_ibs[2], NULL); dma_fence_put(fences[2]); disp1_failed: - amdgpu_ib_free(adev, &disp_ibs[1], NULL); + amdgpu_ib_free(&disp_ibs[1], NULL); dma_fence_put(fences[1]); disp0_failed: - amdgpu_ib_free(adev, &disp_ibs[0], NULL); + amdgpu_ib_free(&disp_ibs[0], NULL); dma_fence_put(fences[0]); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init SGPRS Failed\n"); @@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) } disp_failed: - amdgpu_ib_free(adev, &disp_ib, NULL); + amdgpu_ib_free(&disp_ib, NULL); dma_fence_put(fence); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init VGPRS Failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e2b3dda57030..2ba185875baa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -43,8 +43,12 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -52,10 +56,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 -#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ -#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ -#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ - #define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ #define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ @@ -349,13 +349,17 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, GOLDEN_GB_ADDR_CONFIG); - /* Golden settings applied by driver for ASIC with rev_id 0 */ - if (adev->rev_id == 0) { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, - REDUCE_FIFO_DEPTH_BY_2, 2); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1); } else { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, - SPARE, 0x1); + /* Golden settings applied by driver for ASIC with rev_id 0 */ + if (adev->rev_id == 0) { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, + REDUCE_FIFO_DEPTH_BY_2, 2); + } else { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, + SPARE, 0x1); + } } } } @@ -499,7 +503,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -543,6 +547,7 @@ static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -574,8 +579,19 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, { int err; - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev)) { + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); + + if (err) + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); + } else + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); @@ -929,6 +945,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1779,9 +1796,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id) tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -2764,16 +2779,16 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, }; -static int gfx_v9_4_3_set_powergating_state(void *handle, +static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static int gfx_v9_4_3_set_clockgating_state(void *handle, +static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, num_xcc; if (amdgpu_sriov_vf(adev)) @@ -4653,7 +4668,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_xcc = NUM_XCC(adev->gfx.xcc_mask); - amdgpu_gfx_off_ctrl(adev, false); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count; for (i = 0; i < reg_count; i++) @@ -4661,7 +4675,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], GET_INST(GC, xcc_id))); } - amdgpu_gfx_off_ctrl(adev, true); /* dump compute queue registers for all instances */ if (!adev->gfx.ip_dump_compute_queues) @@ -4670,7 +4683,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); - amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->srbm_mutex); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count * num_inst; @@ -4697,7 +4709,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) } soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_off_ctrl(adev, true); } static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) @@ -4860,6 +4871,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* 9.4.3 removed all the GDS internal memory, * only support GWS opcode in kernel, like barrier * semaphore.etc */ @@ -4873,6 +4885,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* deprecated for 9.4.3, no usage at all */ adev->gds.gds_compute_max_wave_id = 0; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index ed8e130c7d19..5470cef7e9bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -368,7 +368,9 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 4)); + IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 5, 0)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 697599c46240..9bedca9a79c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1088,11 +1088,11 @@ static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v10_0_set_clockgating_state(void *handle, +static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled @@ -1131,7 +1131,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) athub_v2_0_get_clockgating(adev, flags); } -static int gmc_v10_0_set_powergating_state(void *handle, +static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index f893ab4c14df..72751ab4c766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -996,11 +996,11 @@ static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v11_0_set_clockgating_state(void *handle, +static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -1018,7 +1018,7 @@ static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags) athub_v3_0_get_clockgating(adev, flags); } -static int gmc_v11_0_set_powergating_state(void *handle, +static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index d22b027fd0bb..b749f1c3f6a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -40,7 +40,7 @@ #include "gfxhub_v12_0.h" #include "mmhub_v4_1_0.h" #include "athub_v4_1_0.h" - +#include "umc_v8_14.h" static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, @@ -581,6 +581,18 @@ static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 14, 0): + adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev); + adev->umc.node_inst_num = 0; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET; + adev->umc.ras = &umc_v8_14_ras; + break; + default: + break; + } } @@ -829,6 +841,10 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } @@ -980,11 +996,11 @@ static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v12_0_set_clockgating_state(void *handle, +static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -1002,7 +1018,7 @@ static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags) athub_v4_1_0_get_clockgating(adev, flags); } -static int gmc_v12_0_set_powergating_state(void *handle, +static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ca000b3d1afc..2245dda92021 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -131,7 +131,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) chip_name = "si58"; - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { dev_err(adev->dev, "si_mc: Failed to load firmware \"%s_mc.bin\"\n", @@ -1094,13 +1095,13 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v6_0_set_clockgating_state(void *handle, +static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int gmc_v6_0_set_powergating_state(void *handle, +static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index b6016f11956e..9aac4b1101e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -157,7 +157,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1317,11 +1318,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v7_0_set_clockgating_state(void *handle, +static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1337,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v7_0_set_powergating_state(void *handle, +static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 12d5967ecd45..d06585207c33 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -259,7 +259,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1658,10 +1659,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev, } } -static int gmc_v8_0_set_clockgating_state(void *handle, +static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1679,7 +1680,7 @@ static int gmc_v8_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v8_0_set_powergating_state(void *handle, +static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 50c5da3020cb..291549765c38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -623,6 +623,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } } + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + if (!printk_ratelimit()) return 0; @@ -645,7 +648,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, soc15_ih_clientid_name[entry->client_id]); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -795,7 +799,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1138,12 +1143,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; bool is_local; dma_resv_assert_held(bo->tbo.base.resv); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): if (is_vram) { @@ -1157,10 +1163,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: is this still needed? Or does * amdgpu_ttm_tt_pde_flags already handle this? */ - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3)) && + if (gc_ip_version == IP_VERSION(9, 4, 2) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -1184,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* Only local VRAM BOs or system memory on non-NUMA APUs * can be assumed to be local in their entirety. Choose * MTYPE_NC as safe fallback for all system memory BOs on @@ -1208,7 +1212,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (adev->rev_id) + if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) mtype = is_local ? MTYPE_CC : MTYPE_UC; else mtype = MTYPE_UC; @@ -1218,10 +1222,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* dGPU */ if (is_local) mtype = mtype_local; - else if (is_vram) - mtype = MTYPE_NC; - else + else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram) mtype = MTYPE_UC; + else + mtype = MTYPE_NC; } break; @@ -1275,7 +1279,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, * memory can use more efficient MTYPEs. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 5, 0)) return; /* Only direct-mapped memory allows us to determine the NUMA node from @@ -1540,6 +1545,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) adev->mmhub.ras = &mmhub_v1_7_ras; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.ras = &mmhub_v1_8_ras; break; default: @@ -1551,7 +1557,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1619,7 +1626,8 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1792,6 +1800,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): default: adev->gmc.gart_size = 512ULL << 20; break; @@ -2070,7 +2079,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2154,6 +2164,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); @@ -2220,7 +2231,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2250,7 +2262,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ? + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ? 3 : 8; @@ -2263,7 +2276,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) return r; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2274,7 +2288,8 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); @@ -2544,10 +2559,10 @@ static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v9_0_set_clockgating_state(void *handle, +static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->set_clockgating(adev, state); @@ -2565,7 +2580,7 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) athub_v1_0_get_clockgating(adev, flags); } -static int gmc_v9_0_set_powergating_state(void *handle, +static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 7f45e93c0397..8ac3d3282268 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -392,13 +392,13 @@ static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int iceland_ih_set_clockgating_state(void *handle, +static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int iceland_ih_set_powergating_state(void *handle, +static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 38f953fd65d9..f8a485164437 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -693,10 +693,10 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev, } } -static int ih_v6_0_set_clockgating_state(void *handle, +static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -756,10 +756,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_0_set_powergating_state(void *handle, +static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 61381e0c3795..dd0042efceec 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -674,10 +674,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v6_1_set_clockgating_state(void *handle, +static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_1_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -737,10 +737,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_1_set_powergating_state(void *handle, +static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index d2428cf5d385..8f9b15c171f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -664,10 +664,10 @@ static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v7_0_set_clockgating_state(void *handle, +static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v7_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -727,10 +727,10 @@ static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v7_0_set_powergating_state(void *handle, +static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d4f72e47ae9e..aeca5c08ea2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -50,7 +50,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index 1341f0292031..df898dbb746e 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -47,7 +47,8 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 6e29b69894a5..7c9251c03815 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -35,7 +35,7 @@ static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -154,7 +154,7 @@ static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -675,14 +675,14 @@ static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int jpeg_v2_0_set_clockgating_state(void *handle, +static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { - if (!jpeg_v2_0_is_idle(handle)) + if (!jpeg_v2_0_is_idle(adev)) return -EBUSY; jpeg_v2_0_enable_clock_gating(adev); } else { @@ -692,10 +692,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 9ac421486f05..11f6af2646e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -38,7 +38,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); @@ -219,7 +219,7 @@ static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) - jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); @@ -518,10 +518,10 @@ static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v2_5_set_clockgating_state(void *handle, +static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -530,7 +530,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v2_5_is_idle(handle)) + if (!jpeg_v2_5_is_idle(adev)) return -EBUSY; jpeg_v2_5_enable_clock_gating(adev, i); } else { @@ -541,10 +541,10 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index e0df6800502c..4eca65ea9053 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -36,7 +36,7 @@ static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -168,7 +168,7 @@ static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -466,14 +466,14 @@ static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v3_0_set_clockgating_state(void *handle, +static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v3_0_is_idle(handle)) + if (!jpeg_v3_0_is_idle(adev)) return -EBUSY; jpeg_v3_0_enable_clock_gating(adev); } else { @@ -483,10 +483,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if(state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index eca1963c33b6..0aef1f64afd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -39,7 +39,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev); static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev); @@ -206,7 +206,7 @@ static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); @@ -635,14 +635,14 @@ static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v4_0_set_clockgating_state(void *handle, +static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v4_0_is_idle(handle)) + if (!jpeg_v4_0_is_idle(adev)) return -EBUSY; jpeg_v4_0_enable_clock_gating(adev); } else { @@ -652,10 +652,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 67b51bcbacd1..88f9771c1686 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -43,7 +43,7 @@ enum jpeg_engin_status { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); @@ -76,7 +76,7 @@ static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); @@ -321,7 +321,7 @@ static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; ring->wptr = 0; @@ -379,7 +379,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } return ret; @@ -949,16 +949,16 @@ static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int jpeg_v4_0_3_set_clockgating_state(void *handle, +static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (enable) { - if (!jpeg_v4_0_3_is_idle(handle)) + if (!jpeg_v4_0_3_is_idle(adev)) return -EBUSY; jpeg_v4_0_3_enable_clock_gating(adev, i); } else { @@ -968,10 +968,10 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { @@ -1231,9 +1231,95 @@ static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, }; +static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int jpeg_v4_0_3_err_codes[] = { + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + jpeg_v4_0_3_err_codes, + ARRAY_SIZE(jpeg_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = { + .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info jpeg_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &jpeg_v4_0_3_aca_bank_ops, +}; + +static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { .ras_block = { .hw_ops = &jpeg_v4_0_3_ras_hw_ops, + .ras_late_init = jpeg_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 1d9e3b101c3a..6b3656984957 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -48,7 +48,7 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); @@ -236,7 +236,7 @@ static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } return 0; @@ -660,10 +660,10 @@ static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v4_0_5_set_clockgating_state(void *handle, +static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -672,7 +672,7 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) + if (!jpeg_v4_0_5_is_idle(adev)) return -EBUSY; jpeg_v4_0_5_enable_clock_gating(adev, i); @@ -684,10 +684,10 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 58fb1e5fa89c..d5cf0f2799d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -31,12 +31,12 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "jpeg_v5_0_0.h" static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -74,7 +74,7 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) /* JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); if (r) return r; @@ -172,7 +172,7 @@ static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -560,14 +560,14 @@ static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v5_0_0_set_clockgating_state(void *handle, +static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; if (enable) { - if (!jpeg_v5_0_0_is_idle(handle)) + if (!jpeg_v5_0_0_is_idle(adev)) return -EBUSY; jpeg_v5_0_0_enable_clock_gating(adev); } else { @@ -577,10 +577,10 @@ static int jpeg_v5_0_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) @@ -612,7 +612,7 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, DRM_DEBUG("IH: JPEG TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__JPEG_DECODE: + case VCN_5_0__SRCID__JPEG_DECODE: amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c new file mode 100644 index 000000000000..40d4c32a8c2a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" +#include "jpeg_v5_0_1.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring); + +static int amdgpu_ih_srcid_jpeg[] = { + VCN_5_0__SRCID__JPEG_DECODE, + VCN_5_0__SRCID__JPEG1_DECODE, + VCN_5_0__SRCID__JPEG2_DECODE, + VCN_5_0__SRCID__JPEG3_DECODE, + VCN_5_0__SRCID__JPEG4_DECODE, + VCN_5_0__SRCID__JPEG5_DECODE, + VCN_5_0__SRCID__JPEG6_DECODE, + VCN_5_0__SRCID__JPEG7_DECODE, + VCN_5_0__SRCID__JPEG8_DECODE, + VCN_5_0__SRCID__JPEG9_DECODE, +}; + +static int jpeg_v5_0_1_core_reg_offset(u32 pipe) +{ + if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3) + return ((0x40 * pipe) - 0xc80); + else + return ((0x40 * pipe) - 0x440); +} + +/** + * jpeg_v5_0_1_early_init - set function pointers + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES) + return -ENOENT; + + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + jpeg_v5_0_1_set_dec_ring_funcs(adev); + jpeg_v5_0_1_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_1_sw_init - sw init for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 11 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0, + (j ? jpeg_v5_0_1_core_reg_offset(j) : 0)); + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_sw_fini - sw fini for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_1_hw_init - start and test JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + */ +static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + if (amdgpu_sriov_vf(adev)) { + /* jpeg_v5_0_1_start_sriov(adev); */ + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->wptr = 0; + ring->wptr_old = 0; + jpeg_v5_0_1_dec_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + return 0; + } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + ring = adev->jpeg.inst[i].ring_dec; + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << + VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + + return ret; +} + +/** + * jpeg_v5_0_1_suspend - suspend JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = jpeg_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_1_resume - resume JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_1_hw_init(ip_block); + + return r; +} + +static int jpeg_v5_0_1_disable_antihang(struct amdgpu_device *adev, int inst_idx) +{ + int jpeg_inst; + + jpeg_inst = GET_INST(JPEG, inst_idx); + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v5_0_1_enable_antihang(struct amdgpu_device *adev, int inst_idx) +{ + int jpeg_inst; + + jpeg_inst = GET_INST(JPEG, inst_idx); + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + return 0; +} + +/** + * jpeg_v5_0_1_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_1_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i, j, jpeg_inst, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + /* disable antihang */ + r = jpeg_v5_0_1_disable_antihang(adev, i); + if (r) + return r; + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + u32 reg, data, mask; + + ring = &adev->jpeg.inst[i].ring_dec[j]; + + /* enable System Interrupt for JRBC */ + reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN); + if (j < AMDGPU_MAX_JPEG_RINGS_4_0_3) { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << j; + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j); + WREG32_P(reg, data, mask); + } else { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12); + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12)); + WREG32_P(reg, data, mask); + } + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR, + reg_offset); + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_1_stop(struct amdgpu_device *adev) +{ + int i, jpeg_inst, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable antihang */ + r = jpeg_v5_0_1_enable_antihang(adev, i); + if (r) + return r; + } + + return 0; +} + +/** + * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC_RB_WPTR, + (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0), + lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_1_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool ret = false; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + + return ret; +} + +static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + return ret; +} + +static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + int i; + + if (!enable) + return 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (!jpeg_v5_0_1_is_idle(adev)) + return -EBUSY; + } + + return 0; +} + +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_stop(adev); + else + ret = jpeg_v5_0_1_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 i, inst; + + i = node_id_to_phys_map[entry->node_id]; + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); + break; + case VCN_5_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); + break; + case VCN_5_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); + break; + case VCN_5_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); + break; + case VCN_5_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); + break; + case VCN_5_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); + break; + case VCN_5_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); + break; + case VCN_5_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); + break; + case VCN_5_0__SRCID__JPEG8_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]); + break; + case VCN_5_0__SRCID__JPEG9_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { + .name = "jpeg_v5_0_1", + .early_init = jpeg_v5_0_1_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_1_sw_init, + .sw_fini = jpeg_v5_0_1_sw_fini, + .hw_init = jpeg_v5_0_1_hw_init, + .hw_fini = jpeg_v5_0_1_hw_fini, + .suspend = jpeg_v5_0_1_suspend, + .resume = jpeg_v5_0_1_resume, + .is_idle = jpeg_v5_0_1_is_idle, + .wait_for_idle = jpeg_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_1_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = { + .set = jpeg_v5_0_1_set_interrupt_state, + .process = jpeg_v5_0_1_process_interrupt, +}; + +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &jpeg_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h new file mode 100644 index 000000000000..8ce146c00bb6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_1_H__ +#define __JPEG_V5_0_1_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 9c905b9e9376..65f389eb65e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1505,9 +1505,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 9ecc5d61e49b..901e924e69ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include "amdgpu.h" +#include "gfx_v12_0.h" #include "soc15_common.h" #include "soc21.h" #include "gc/gc_12_0_0_offset.h" @@ -350,6 +351,132 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + +static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value, reg; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v12_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v12_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + switch (me_id) { + case 1: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ); + break; + case 0: + default: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ); + break; + } + + value = 1 << queue_id; + WREG32(reg, value); + /* wait for queue reset done */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(reg) & value)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on sdma queue reset done\n"); + r = -ETIMEDOUT; + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, struct mes_reset_queue_input *input) { @@ -629,7 +756,8 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + + pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } if (enforce_isolation) @@ -721,6 +849,11 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, union MESAPI__RESET mes_reset_queue_pkt; int pipe; + if (input->use_mmio) + return mes_v12_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -851,29 +984,50 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) uint32_t pipe, data = 0; if (enable) { - data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { soc21_grbm_select(adev, 3, pipe, 0, 0); + if (amdgpu_mes_log_enable) { + u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE; + /* In case uni mes is not enabled, only program for pipe 0 */ + if (adev->mes.event_log_size >= (pipe + 1) * log_size) { + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO, + lower_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI, + upper_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n", + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI), + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO)); + } + } + + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); + if (pipe == 0) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + else + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START, lower_32_bits(ucode_addr)); WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI, upper_32_bits(ucode_addr)); + + /* unhalt MES and activate one pipe each loop */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + if (pipe) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); + dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data); + + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* unhalt MES and activate pipe0 */ - data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - if (amdgpu_emu_mode) msleep(100); else if (adev->enable_uni_mes) @@ -1347,8 +1501,9 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; adev->mes.enable_legacy_queue_map = true; - adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE; - + adev->mes.event_log_size = adev->enable_uni_mes ? + (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) : + (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); r = amdgpu_mes_init(adev); if (r) return r; @@ -1455,9 +1610,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index e9a6f33ca710..243eabda0607 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -356,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, - enable); + enable, 0); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index b01bb759d0f4..e646e5cef0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,7 +33,6 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 -#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 0fbc3be81f14..f2ab5001b492 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -108,7 +108,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 1, 0): mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; break; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 0820ed62e2e8..62cdfe10e6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catch up. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -667,17 +666,17 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int navi10_ih_set_clockgating_state(void *handle, +static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; navi10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); return 0; } -static int navi10_ih_set_powergating_state(void *handle, +static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 39919e0892c1..c92875ceb31f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -28,6 +28,7 @@ #include "nbif/nbif_6_3_1_sh_mask.h" #include "pcie/pcie_6_1_0_offset.h" #include "pcie/pcie_6_1_0_sh_mask.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) @@ -518,3 +519,83 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { .get_rom_offset = nbif_v6_3_1_get_rom_offset, .set_reg_remap = nbif_v6_3_1_set_reg_remap, }; + +static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + + return 0; +} + +static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to bif ring. since bif ring is not enabled, just leave process callback + * as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = { + .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state, + .process = nbif_v6_3_1_process_err_event_athub_irq, +}; + +static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + amdgpu_ras_global_ras_isr(adev); + } +} + +static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbif_v6_3_1_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt + * nbif v6_3_1 uses the same irq source as nbio v7_4 + */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; +} + +struct amdgpu_nbio_ras nbif_v6_3_1_ras = { + .handle_ras_err_event_athub_intr_no_bifring = + nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_err_event_athub_interrupt = + nbif_v6_3_1_init_ras_err_event_athub_interrupt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h index b7f2e0d88905..9ac4831d39e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -29,5 +29,6 @@ extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; +extern struct amdgpu_nbio_ras nbif_v6_3_1_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index b1b57dcc5a73..d1032e9992b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -271,8 +271,19 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; +#define regRCC_DEV0_EPF6_STRAP4 0xd304 +#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5 + static void nbio_v7_0_init_registers(struct amdgpu_device *adev) { + uint32_t data; + + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { + case IP_VERSION(2, 5, 0): + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23); + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data); + break; + } } #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 814ab59fdd4a..41421da63a08 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -275,7 +275,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): case IP_VERSION(7, 11, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 1ac730328516..3fb6d2aa7e3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -247,7 +247,7 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data); - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 7, 0): data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 3bad565ded73..47db483c3516 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1039,10 +1039,10 @@ static bool nv_common_is_idle(void *handle) return true; } -static int nv_common_set_clockgating_state(void *handle, +static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1070,7 +1070,7 @@ static int nv_common_set_clockgating_state(void *handle, return 0; } -static int nv_common_set_powergating_state(void *handle, +static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* TODO */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index c4b775aaee9f..cc621064610f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); @@ -122,6 +124,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): err = psp_init_sos_microcode(psp, ucode_prefix); if (err) @@ -177,6 +180,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) retry_cnt = ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ? PSP_VMBX_POLLING_LIMIT : 10; @@ -203,6 +207,7 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { ret = psp_v13_0_wait_for_vmbx_ready(psp); if (ret) @@ -288,6 +293,11 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV); +} + static inline void psp_v13_0_init_sos_version(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -798,6 +808,7 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) return false; if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) && (!(adev->flags & AMD_IS_APU))) { reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); @@ -857,6 +868,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, + .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_create = psp_v13_0_ring_create, .ring_stop = psp_v13_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 7948d74f8722..135c5099bfb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -145,9 +145,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -631,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1080,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v2_4_set_clockgating_state(void *handle, +static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* XXX handled via the smc on VI */ return 0; } -static int sdma_v2_4_set_powergating_state(void *handle, +static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 9a3d729545a7..c611328671ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -305,9 +305,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -904,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1483,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep( } } -static int sdma_v3_0_set_clockgating_state(void *handle, +static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1506,7 +1508,7 @@ static int sdma_v3_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v3_0_set_powergating_state(void *handle, +static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c1f98f6cf20d..b48d9c0b2e1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1565,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1956,7 +1956,7 @@ static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0); if (!amdgpu_sriov_vf(adev)) sdma_v4_0_init_golden_registers(adev); @@ -1983,7 +1983,7 @@ static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v4_0_enable(adev, false); if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0); return 0; } @@ -2297,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep( } } -static int sdma_v4_0_set_clockgating_state(void *handle, +static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -2312,10 +2312,10 @@ static int sdma_v4_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_0_set_powergating_state(void *handle, +static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a38553f38fdc..5e0066cd6c51 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -189,6 +189,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; @@ -667,11 +668,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: used to restore wptr when restart * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -698,16 +700,24 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + } doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -755,11 +765,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: boolean to say restore needed or not * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -775,10 +786,17 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + } /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI, @@ -792,7 +810,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1); @@ -911,12 +930,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @inst_mask: mask of dma engine instances to be enabled + * @restore: boolean to say restore needed or not * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, - uint32_t inst_mask) + uint32_t inst_mask, bool restore) { struct amdgpu_ring *ring; uint32_t tmp_mask; @@ -927,7 +947,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && + if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && adev->sdma.instance[0].fw) { r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) @@ -946,17 +966,19 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); - /* enable context empty interrupt during initialization */ - temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); - WREG32_SDMA(i, regSDMA_CNTL, temp); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); + } if (!amdgpu_sriov_vf(adev)) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ @@ -1110,7 +1132,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1384,6 +1406,12 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) &adev->sdma.srbm_write_irq); if (r) return r; + + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_CTXEMPTY, + &adev->sdma.ctxt_empty_irq); + if (r) + return r; } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1466,6 +1494,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_sdma_sysfs_reset_mask_fini(adev); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) amdgpu_sdma_destroy_inst_ctx(adev, true); else @@ -1486,7 +1515,7 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -1514,7 +1543,7 @@ static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) @@ -1522,7 +1551,7 @@ static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev)) - sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); return sdma_v4_4_2_hw_fini(ip_block); } @@ -1573,6 +1602,42 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + u32 inst_mask; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + /* stop queue */ + inst_mask = 1 << ring->me; + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + if (adev->sdma.has_page_queue) + sdma_v4_4_2_inst_page_stop(adev, inst_mask); + + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me)); + if (r) + return r; + + udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT)) + break; + udelay(1); + } + + if (i == adev->usec_timeout) { + dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n", + ring->me); + return -ETIMEDOUT; + } + + return sdma_v4_4_2_inst_start(adev, inst_mask, true); +} + static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1755,6 +1820,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* There is nothing useful to be done here, only kept for debug */ + dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt"); + sdma_v4_4_2_print_iv_entry(adev, entry); + return 0; +} + static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { @@ -1821,10 +1896,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( } } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) @@ -1839,7 +1914,7 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_4_2_set_powergating_state(void *handle, +static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1895,7 +1970,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) if (!adev->sdma.ip_dump) return; - amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { instance_offset = i * reg_count; for (j = 0; j < reg_count; j++) @@ -1903,7 +1977,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) RREG32(sdma_v4_4_2_get_reg_offset(adev, i, sdma_reg_list_4_4_2[j].reg_offset)); } - amdgpu_gfx_off_ctrl(adev, true); } const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { @@ -1955,6 +2028,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -2038,6 +2112,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = { .process = sdma_v4_4_2_process_srbm_write_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = { + .process = sdma_v4_4_2_process_ctxt_empty_irq, +}; + static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = adev->sdma.num_instances; @@ -2046,6 +2124,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; + adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs; @@ -2054,6 +2133,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs; adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs; adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs; + adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs; } /** @@ -2167,7 +2247,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa9b40934957..b764550834a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1194,7 +1194,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1853,10 +1853,10 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_0_set_clockgating_state(void *handle, +static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1877,7 +1877,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_0_set_powergating_state(void *handle, +static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ba5160399ab2..b1818e87889a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1050,7 +1050,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1812,10 +1812,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_2_set_clockgating_state(void *handle, +static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1841,7 +1841,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_2_set_powergating_state(void *handle, +static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d46128b0ec92..1a023b45f0be 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1063,7 +1063,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1601,13 +1601,13 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v6_0_set_clockgating_state(void *handle, +static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v6_0_set_powergating_state(void *handle, +static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index d2ce6b6a7ff6..9c17df2cf37b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -490,162 +490,185 @@ static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v7_0_gfx_resume - setup and start the async dma engines + * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them. - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; u32 rb_bufsz; u32 doorbell; u32 doorbell_offset; - u32 tmp; + u32 temp; u64 wptr_gpu_addr; - int i, r; - - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; + int r; - //if (!amdgpu_sriov_vf(adev)) - // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + ring = &adev->sdma.instance[i].ring; - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); - else - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + if (!restore) ring->wptr = 0; - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } - doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - if (i == 0) - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - - if (amdgpu_sriov_vf(adev)) - sdma_v7_0_ring_set_wptr(ring); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); - - /* Set up sdma hang watchdog */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); - /* 100ms per unit */ - tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, - max(adev->usec_timeout/100000, 1)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp); - - /* Set up RESP_MODE to non-copy addresses */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp); - - /* program default cache read and write policy */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - tmp &= 0xFF0FFF; - tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp); - - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); - } + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + sdma_v7_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + ring->sched.ready = true; - ring->sched.ready = true; + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v7_0_ctx_switch_enable(adev, true); + sdma_v7_0_enable(adev, true); + } - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v7_0_ctx_switch_enable(adev, true); - sdma_v7_0_enable(adev, true); - } + r = amdgpu_ring_test_helper(ring); + if (r) + ring->sched.ready = false; - r = amdgpu_ring_test_helper(ring); - if (r) { - ring->sched.ready = false; - return r; - } + return r; +} + +/** + * sdma_v7_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v7_0_gfx_resume_instance(adev, i, false); + if (r) + return r; } return 0; + } /** @@ -806,6 +829,31 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) return false; } +static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + if (r) + return r; + + return sdma_v7_0_gfx_resume_instance(adev, i, true); +} + /** * sdma_v7_0_start - setup and start the async dma engines * @@ -1060,7 +1108,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1316,6 +1364,13 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->sdma.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_sdma_sysfs_reset_mask_init(adev); + if (r) + return r; /* Allocate memory for SDMA IP Dump buffer */ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); if (ptr) @@ -1334,6 +1389,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + amdgpu_sdma_sysfs_reset_mask_fini(adev); amdgpu_sdma_destroy_inst_ctx(adev, true); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) @@ -1524,13 +1580,13 @@ static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v7_0_set_clockgating_state(void *handle, +static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v7_0_set_powergating_state(void *handle, +static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1636,6 +1692,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v7_0_ring_init_cond_exec, .preempt_ib = sdma_v7_0_ring_preempt_ib, + .reset = sdma_v7_0_reset_queue, }; static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 00f63d3fbea7..77ef7da2e4fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2649,13 +2649,13 @@ static bool si_common_is_idle(void *handle) return true; } -static int si_common_set_clockgating_state(void *handle, +static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_common_set_powergating_state(void *handle, +static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 47647a6083e8..dbd78d5345a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -286,7 +286,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -629,13 +629,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, return 0; } -static int si_dma_set_clockgating_state(void *handle, +static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { u32 orig, data, offset; int i; bool enable; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; enable = (state == AMD_CG_STATE_GATE); @@ -672,12 +672,12 @@ static int si_dma_set_clockgating_state(void *handle, return 0; } -static int si_dma_set_powergating_state(void *handle, +static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; WREG32(DMA_PGFSM_WRITE, 0x00002000); WREG32(DMA_PGFSM_CONFIG, 0x100010ff); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 2ec1ebe4db11..a32b6243c1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -263,13 +263,13 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int si_ih_set_clockgating_state(void *handle, +static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_ih_set_powergating_state(void *handle, +static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ede072758dab..a59b4c36cad7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -171,6 +171,24 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = { .codec_array = NULL, }; +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = { + .codec_count = 0, + .codec_array = NULL, +}; + +static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0, +}; + static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -209,6 +227,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &vcn_4_0_3_video_codecs_decode; return 0; + case IP_VERSION(5, 0, 1): + if (encode) + *codecs = &vcn_5_0_1_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_0_1_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -327,6 +351,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14)) return 10000; if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) || @@ -556,6 +581,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): /* Use gpu_recovery param to target a reset method. * Enable triggering of GPU reset only if specified * by module parameter. @@ -1177,6 +1203,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->asic_funcs = &aqua_vanjaram_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | @@ -1385,10 +1412,10 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); } -static int soc15_common_set_clockgating_state(void *handle, +static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1453,6 +1480,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); @@ -1473,7 +1501,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) adev->df.funcs->get_clockgating_state(adev, flags); } -static int soc15_common_set_powergating_state(void *handle, +static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* todo */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d6999835918f..62ad67d0b598 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -928,10 +928,10 @@ static bool soc21_common_is_idle(void *handle) return true; } -static int soc21_common_set_clockgating_state(void *handle, +static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(4, 3, 0): @@ -954,10 +954,10 @@ static int soc21_common_set_clockgating_state(void *handle, return 0; } -static int soc21_common_set_powergating_state(void *handle, +static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(6, 0, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index be96de92b2f5..6b8e078ee7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -444,8 +444,18 @@ static int soc24_common_late_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + /* don't need to fail gpu late init + * if enabling athub_err_event interrupt failed + * nbif v6_3_1 only support fatal error hanlding + * just enable the interrupt directly + */ + amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } /* Enable selfring doorbell aperture late because doorbell BAR * aperture will change if resize BAR successfully in gmc sw_init. @@ -501,8 +511,13 @@ static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block) adev->nbio.funcs->enable_doorbell_aperture(adev, false); adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_put_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } return 0; } @@ -522,10 +537,10 @@ static bool soc24_common_is_idle(void *handle) return true; } -static int soc24_common_set_clockgating_state(void *handle, +static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 3, 1): @@ -542,10 +557,10 @@ static int soc24_common_set_clockgating_state(void *handle, return 0; } -static int soc24_common_set_powergating_state(void *handle, +static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(7, 0, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 21b71a427b1f..64891f099366 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -30,6 +30,9 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +/* invalid node instance value */ +#define TA_RAS_INV_NODE 0xffff + /* RAS related enumerations */ /**********************************************************/ enum ras_command { diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h index 00d8bdb8254f..9ec2e03d41c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h @@ -31,10 +31,12 @@ * Secure Display Command ID */ enum ta_securedisplay_command { - /* Query whether TA is responding used only for validation purpose */ + /* Query whether TA is responding. It is used only for validation purpose */ TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1, /* Send region of Interest and CRC value to I2C */ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2, + /* V2 to send multiple regions of Interest and CRC value to I2C */ + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3, /* Maximum Command ID */ TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF, }; @@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret { enum ta_securedisplay_buffer_size { /* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */ TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15, + /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */ + TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16, }; /** Input/output structures for Secure Display commands */ @@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size { * Physical ID to determine which DIO scratch register should be used to get ROI */ struct ta_securedisplay_send_roi_crc_input { - uint32_t phy_id; /* Physical ID */ + /* Physical ID */ + uint32_t phy_id; +}; + +struct ta_securedisplay_send_roi_crc_v2_input { + /* Physical ID */ + uint32_t phy_id; + /* Region of interest index */ + uint8_t roi_idx; }; /** @union ta_securedisplay_cmd_input @@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input { union ta_securedisplay_cmd_input { /* send ROI and CRC input buffer format */ struct ta_securedisplay_send_roi_crc_input send_roi_crc; + /* send ROI and CRC input buffer format, v2 adds a ROI index */ + struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2; uint32_t reserved[4]; }; @@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output { uint8_t reserved; }; +struct ta_securedisplay_send_roi_crc_v2_output { + uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */ +}; + /** @union ta_securedisplay_cmd_output * Output buffer */ @@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output { struct ta_securedisplay_query_ta_output query_ta; /* Send ROI CRC output buffer format used only for validation purpose */ struct ta_securedisplay_send_roi_crc_output send_roi_crc; + /* Send ROI CRC output buffer format used only for validation purpose */ + struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2; uint32_t reserved[4]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 5a04a6770138..0968e551f7b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -448,13 +448,13 @@ static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int tonga_ih_set_clockgating_state(void *handle, +static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int tonga_ih_set_powergating_state(void *handle, +static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 1a8ea834efa6..a7b9c358a2d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -173,156 +173,96 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, +static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - struct ta_ras_query_address_input *addr_in) + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr) { - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; + uint32_t col, col_lower, row, row_lower, bank; + uint32_t channel_index = 0, umc_inst = 0; + uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint64_t soc_pa, column, err_addr; + struct ta_ras_query_address_output addr_out_tmp; + struct ta_ras_query_address_output *paddr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + int ret = 0; + + if (!addr_out) + paddr_out = &addr_out_tmp; + else + paddr_out = addr_out; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); + err_addr = bank = 0; + if (addr_in) { + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out); + if (ret) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return; - } + goto out; + } - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); + bank = paddr_out->pa.bank; + /* no need to care about umc inst if addr_in is NULL */ + umc_inst = addr_in->ma.umc_inst; } -} -static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, - struct ta_ras_query_address_output *addr_out, - uint64_t err_addr) -{ - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column; - - soc_pa = addr_out->pa.pa; - bank = addr_out->pa.bank; - channel_index = addr_out->pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - } -} + loop_bits[0] = UMC_V12_0_PA_C2_BIT; + loop_bits[1] = UMC_V12_0_PA_C3_BIT; + loop_bits[2] = UMC_V12_0_PA_C4_BIT; + loop_bits[3] = UMC_V12_0_PA_R13_BIT; -static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, - uint64_t pa_addr, uint64_t *pfns, int len) -{ - uint64_t soc_pa, retired_page, column; - uint32_t pos = 0; - - soc_pa = pa_addr; - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS4_PARTITION_MODE) { + loop_bits[0] = UMC_V12_0_PA_CH4_BIT; + loop_bits[1] = UMC_V12_0_PA_CH5_BIT; + loop_bits[2] = UMC_V12_0_PA_B0_BIT; + loop_bits[3] = UMC_V12_0_PA_R11_BIT; } - return pos; -} - -static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, - uint64_t err_addr, uint32_t ch, uint32_t umc, - uint32_t node, uint32_t socket, - uint64_t *addr, bool dump_addr) -{ - struct ta_ras_query_address_input addr_in; - struct ta_ras_query_address_output addr_out; - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = ch; - addr_in.ma.umc_inst = umc; - addr_in.ma.node_inst = node; - addr_in.ma.socket_id = socket; - addr_in.addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return -EINVAL; + soc_pa = paddr_out->pa.pa; + channel_index = paddr_out->pa.channel_idx; + /* clear loop bits in soc physical address */ + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa &= ~BIT_ULL(loop_bits[i]); + + paddr_out->pa.pa = soc_pa; + /* get column bit 0 and 1 in mca address */ + col_lower = (err_addr >> 1) & 0x3ULL; + /* MA_R13_BIT will be handled later */ + row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + + if (!err_data && !dump_addr) + goto out; + + /* loop for all possibilities of retired bits */ + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { + soc_pa = paddr_out->pa.pa; + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); + + col = ((column & 0x7) << 2) | col_lower; + /* add row bit 13 */ + row = ((column >> 3) << 13) | row_lower; + + if (dump_addr) + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + soc_pa, row, col, bank, channel_index); + + if (err_data) + amdgpu_umc_fill_error_record(err_data, err_addr, + soc_pa, channel_index, umc_inst); } - if (dump_addr) - umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); - - *addr = addr_out.pa.pa; - - return 0; +out: + return ret; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, @@ -374,7 +314,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, addr_in.ma.umc_inst = umc_inst; addr_in.ma.node_inst = node_inst; - umc_v12_0_convert_error_address(adev, err_data, &addr_in); + umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true); } /* clear umc status */ @@ -526,6 +466,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; + struct ta_ras_query_address_output addr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + uint32_t shift_bit = UMC_V12_0_PA_C4_BIT; int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); @@ -552,10 +495,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); - ret = umc_v12_0_convert_mca_to_addr(adev, + ret = amdgpu_umc_mca_to_addr(adev, err_addr, MCA_IPID_2_UMC_CH(ipid), MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), - MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true); if (ret) return ret; @@ -563,14 +506,21 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (!ecc_err) return -ENOMEM; + pa_addr = addr_out.pa.pa; ecc_err->status = status; ecc_err->ipid = ipid; ecc_err->addr = addr; - ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->channel_idx = addr_out.pa.channel_idx; + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + if (nps == AMDGPU_NPS4_PARTITION_MODE) + shift_bit = UMC_V12_0_PA_B0_BIT; /* If converted pa_pfn is 0, use pa C4 pfn. */ if (!ecc_err->pa_pfn) - ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT; ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); if (ret) { @@ -586,7 +536,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, con->umc_ecc_log.de_queried_count++; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, pa_addr, page_pfn, ARRAY_SIZE(page_pfn)); if (count <= 0) { @@ -629,7 +579,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, return -EINVAL; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, page_pfn, ARRAY_SIZE(page_pfn)); @@ -637,7 +587,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, - MCA_IPID_2_UMC_CH(ecc_err->ipid), + ecc_err->channel_idx, MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) break; @@ -676,6 +626,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, mutex_unlock(&con->umc_ecc_log.lock); } +static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page) +{ + uint32_t die = 0; + + /* we only calculate die id for nps1 mode right now */ + die += ((((retired_page >> 12) & 0x1ULL)^ + ((retired_page >> 20) & 0x1ULL) ^ + ((retired_page >> 27) & 0x1ULL) ^ + ((retired_page >> 34) & 0x1ULL) ^ + ((retired_page >> 41) & 0x1ULL)) << 0); + + /* the original PA_C4 and PA_R13 may be cleared in retired_page, so + * get them from mca_addr. + */ + die += ((((retired_page >> 13) & 0x1ULL) ^ + ((mca_addr >> 5) & 0x1ULL) ^ + ((retired_page >> 28) & 0x1ULL) ^ + ((mca_addr >> 23) & 0x1ULL) ^ + ((retired_page >> 42) & 0x1ULL)) << 1); + die &= 3; + + return die; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -686,5 +661,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, .update_ecc_status = umc_v12_0_update_ecc_status, + .convert_ras_err_addr = umc_v12_0_convert_error_address, + .get_die_id_from_pa = umc_v12_0_get_die_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index be5598d76c1d..9298018d938f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,12 +55,24 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) +/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */ +#define UMC_V12_0_RETIRE_LOOP_BITS 4 /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 +#define UMC_V12_0_PA_C3_BIT 16 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ +#define UMC_V12_0_PA_R0_BIT 22 +#define UMC_V12_0_PA_R11_BIT 33 #define UMC_V12_0_PA_R13_BIT 35 +/* channel bit in SOC physical address */ +#define UMC_V12_0_PA_CH4_BIT 12 +#define UMC_V12_0_PA_CH5_BIT 13 +/* bank bit in SOC physical address */ +#define UMC_V12_0_PA_B0_BIT 19 +/* row bits in MCA address */ +#define UMC_V12_0_MA_R0_BIT 10 #define MCA_UMC_HWID_V12_0 0x96 #define MCA_UMC_MCATYPE_V12_0 0x0 @@ -81,11 +93,6 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) -#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ - ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ - (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ - (0x1ULL << UMC_V12_0_PA_R13_BIT))) - bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c new file mode 100644 index 000000000000..eaca10a3c4a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c @@ -0,0 +1,160 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_14.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_14_0_offset.h" +#include "umc/umc_8_14_0_sh_mask.h" + +static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst; +} + +static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_clear_error_count(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_clear_error_count_per_channel, NULL); +} + +static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + umc_v8_14_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_14_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; +} + +static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_query_error_count_per_channel, ras_error_status); + + umc_v8_14_clear_error_count(adev); +} + +static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_err_cnt_init_per_channel, NULL); +} + +const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = { + .query_ras_error_count = umc_v8_14_query_ras_error_count, +}; + +struct amdgpu_umc_ras umc_v8_14_ras = { + .ras_block = { + .hw_ops = &umc_v8_14_ras_hw_ops, + }, + .err_cnt_init = umc_v8_14_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h new file mode 100644 index 000000000000..20a258f0017a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_14_H__ +#define __UMC_V8_14_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num) + +/* Total channel instances for all available umc nodes */ +#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc) + +/* UMC register per channel offset */ +#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400 + +#define UMC_V8_14_INST_DIST 0x40000 + +/* EccErrCnt max value */ +#define UMC_V8_14_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V8_14_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD) + +extern struct amdgpu_umc_ras umc_v8_14_ras; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index bdbca25d80c4..5830e799c0a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -790,13 +790,13 @@ static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block) return uvd_v3_1_start(adev); } -static int uvd_v3_1_set_clockgating_state(void *handle, +static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v3_1_set_powergating_state(void *handle, +static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index a836dc9cfcad..f93079e09215 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v4_2_start(struct amdgpu_device *adev); static void uvd_v4_2_stop(struct amdgpu_device *adev); -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, bool sw_mode); @@ -708,13 +708,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v4_2_set_powergating_state(void *handle, +static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -724,7 +724,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ab55fae3569e..050a0f309390 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v5_0_start(struct amdgpu_device *adev); static void uvd_v5_0_stop(struct amdgpu_device *adev); -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -155,7 +155,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) int r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -790,16 +790,11 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - struct amdgpu_ip_block *ip_block; - - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; if (enable) { /* wait for STATUS to clear */ @@ -817,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v5_0_set_powergating_state(void *handle, +static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -827,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 39f8c3d3a135..d9d036ee51fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v6_0_start(struct amdgpu_device *adev); static void uvd_v6_0_stop(struct amdgpu_device *adev); static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev); -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -467,7 +467,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) int i, r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -1450,17 +1450,12 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ip_block *ip_block; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; - if (enable) { /* wait for STATUS to clear */ if (uvd_v6_0_wait_for_idle(ip_block)) @@ -1476,7 +1471,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v6_0_set_powergating_state(void *handle, +static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -1486,7 +1481,7 @@ static int uvd_v6_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 079131aeb2f7..9d237b5937fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = amdgpu_job_ring(job); unsigned i; /* No patching necessary for the first instance */ @@ -1511,7 +1511,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v7_0_set_clockgating_state(void *handle, +static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c1ed91b39415..c633b7ff2943 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -578,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v2_0_set_clockgating_state(void *handle, +static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; bool sw_cg = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) { gate = true; @@ -596,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle, return 0; } -static int vce_v2_0_set_powergating_state(void *handle, +static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -606,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v2_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6bb318a06f19..f8bddcd19b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -65,7 +65,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block); -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); /** * vce_v3_0_ring_get_rptr - get read pointer @@ -497,7 +497,7 @@ static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) return r; vce_v3_0_stop(adev); - return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); + return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE); } static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block) @@ -760,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -801,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, return 0; } -static int vce_v3_0_set_powergating_state(void *handle, +static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -811,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 79ee555768a5..335bda64ff5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -684,14 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev) ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); } -static int vce_v4_0_set_clockgating_state(void *handle, +static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ return 0; } -static int vce_v4_0_set_powergating_state(void *handle, +static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -701,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v4_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 10e99c926fb8..5ea96c983517 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -85,7 +85,8 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -281,7 +282,7 @@ static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, 0, mmUVD_STATUS))) { - vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v1_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } return 0; @@ -303,7 +304,7 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block) idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); if (idle_work_unexecuted) { if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); } r = vcn_v1_0_hw_fini(ip_block); @@ -344,7 +345,7 @@ static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -411,7 +412,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1394,15 +1395,15 @@ static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v1_0_set_clockgating_state(void *handle, +static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ - if (!vcn_v1_0_is_idle(handle)) + if (!vcn_v1_0_is_idle(adev)) return -EBUSY; vcn_v1_0_enable_clock_gating(adev); } else { @@ -1799,7 +1800,7 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun } } -static int vcn_v1_0_set_powergating_state(void *handle, +static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. @@ -1810,7 +1811,7 @@ static int vcn_v1_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == adev->vcn.cur_state) return 0; @@ -1856,7 +1857,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -1886,7 +1887,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e0322cbca3ec..e42cfc731ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -92,7 +92,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_0_set_powergating_state(void *handle, +static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -318,7 +318,7 @@ static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, 0, mmUVD_STATUS))) - vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -372,7 +372,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -428,7 +428,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -978,7 +978,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) int i, j, r; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); @@ -1235,7 +1235,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) power_off: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); return 0; } @@ -1335,10 +1335,10 @@ static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_0_set_clockgating_state(void *handle, +static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -1346,7 +1346,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, if (enable) { /* wait for STATUS to clear */ - if (!vcn_v2_0_is_idle(handle)) + if (!vcn_v2_0_is_idle(adev)) return -EBUSY; vcn_v2_0_enable_clock_gating(adev); } else { @@ -1796,7 +1796,7 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) } -static int vcn_v2_0_set_powergating_state(void *handle, +static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. @@ -1807,7 +1807,7 @@ static int vcn_v2_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { adev->vcn.cur_state = AMD_PG_STATE_UNGATE; @@ -1920,7 +1920,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 6aa08281d094..b518202955ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -95,7 +95,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_5_set_powergating_state(void *handle, +static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -399,7 +399,7 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, mmUVD_STATUS))) - vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); @@ -465,7 +465,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -514,7 +514,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1012,8 +1012,10 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) uint32_t rb_bufsz, tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1285,7 +1287,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( @@ -1485,8 +1487,10 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1778,6 +1782,7 @@ static bool vcn_v2_5_is_idle(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); } @@ -1801,17 +1806,17 @@ static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_5_set_clockgating_state(void *handle, +static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) return 0; if (enable) { - if (!vcn_v2_5_is_idle(handle)) + if (!vcn_v2_5_is_idle(adev)) return -EBUSY; vcn_v2_5_enable_clock_gating(adev); } else { @@ -1821,10 +1826,10 @@ static int vcn_v2_5_set_clockgating_state(void *handle, return 0; } -static int vcn_v2_5_set_powergating_state(void *handle, +static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6732ad7f16f5..63ddd4cca910 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -105,7 +105,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v3_0_set_powergating_state(void *handle, +static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -430,9 +430,9 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, mmUVD_STATUS))) { - vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vcn_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -490,7 +490,7 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -540,7 +540,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1141,8 +1141,10 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) uint32_t rb_bufsz, tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1373,7 +1375,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1632,8 +1634,10 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) vcn_v3_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -2132,10 +2136,10 @@ static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v3_0_set_clockgating_state(void *handle, +static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -2155,10 +2159,10 @@ static int vcn_v3_0_set_clockgating_state(void *handle, return 0; } -static int vcn_v3_0_set_powergating_state(void *handle, +static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index fcc8511e91ee..00551d6f0370 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -96,7 +96,7 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_set_powergating_state(void *handle, +static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -366,9 +366,9 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) continue; if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vcn_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) @@ -431,7 +431,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -491,7 +491,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx { uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1097,8 +1097,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1341,7 +1343,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1623,8 +1625,10 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) vcn_v4_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -2007,14 +2011,15 @@ static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -2037,14 +2042,15 @@ static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_sta /** * vcn_v4_0_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3f69b9b2bcd0..ecdc027f8220 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -87,7 +87,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_3_set_powergating_state(void *handle, +static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -349,7 +349,7 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) cancel_delayed_work_sync(&adev->vcn.idle_work); if (adev->vcn.cur_state != AMD_PG_STATE_GATE) - vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -482,7 +482,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -957,6 +957,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { vcn_inst = GET_INST(VCN, i); + vcn_v4_0_3_fw_shared_init(adev, vcn_inst); + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; @@ -969,7 +971,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, @@ -1121,8 +1123,10 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) int i, j, k, r, vcn_inst; uint32_t tmp; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1395,8 +1399,10 @@ static int vcn_v4_0_3_stop(struct amdgpu_device *adev) vcn_v4_0_3_enable_clock_gating(adev, i); } Done: - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1616,15 +1622,15 @@ static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_3_set_clockgating_state(void *handle, +static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -1644,15 +1650,15 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle, /** * vcn_v4_0_3_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_3_set_powergating_state(void *handle, +static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating @@ -1911,9 +1917,94 @@ static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, }; +static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int vcn_v4_0_3_err_codes[] = { + 14, 15, /* VCN */ +}; + +static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + vcn_v4_0_3_err_codes, + ARRAY_SIZE(vcn_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = { + .aca_bank_parser = vcn_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info vcn_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &vcn_v4_0_3_aca_bank_ops, +}; + +static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, + &vcn_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_vcn_ras vcn_v4_0_3_ras = { .ras_block = { .hw_ops = &vcn_v4_0_3_ras_hw_ops, + .ras_late_init = vcn_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 71961fb3f7ff..23d3c16c9d9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -95,7 +95,7 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_5_set_powergating_state(void *handle, +static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -309,7 +309,7 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -370,7 +370,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -431,7 +431,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1000,8 +1000,10 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1277,8 +1279,10 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) vcn_v4_0_5_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1492,14 +1496,15 @@ static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -1522,14 +1527,15 @@ static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_s /** * vcn_v4_0_5_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->vcn.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index bd3d2bbdc16b..b6d78381ebfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -32,7 +32,7 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "vcn_v5_0_0.h" #include <drm/drm_drv.h> @@ -78,7 +78,7 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v5_0_0_set_powergating_state(void *handle, +static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -105,6 +105,21 @@ static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) return amdgpu_vcn_early_init(adev); } +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; + + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } +} + /** * vcn_v5_0_0_sw_init - sw init for VCN block * @@ -117,8 +132,6 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -140,13 +153,13 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; /* VCN POISON TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); if (r) return r; @@ -177,14 +190,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + vcn_v5_0_0_alloc_ip_dump(adev); r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -283,7 +289,7 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -344,7 +350,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -405,7 +411,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -771,8 +777,10 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1018,8 +1026,10 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) vcn_v5_0_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1229,14 +1239,15 @@ static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -1259,14 +1270,15 @@ static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_s /** * vcn_v5_0_0_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->vcn.cur_state) @@ -1312,10 +1324,10 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID_UVD_POISON: + case VCN_5_0__SRCID_UVD_POISON: amdgpu_vcn_process_poison_irq(adev, source, entry); break; default: @@ -1351,7 +1363,8 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1383,7 +1396,7 @@ static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm } } -static void vcn_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1424,8 +1437,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .wait_for_idle = vcn_v5_0_0_wait_for_idle, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = vcn_v5_0_dump_ip_state, - .print_ip_state = vcn_v5_0_print_ip_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h index 51bbccd4360f..b8927652bc50 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -32,6 +32,11 @@ #define VCN_VID_IP_ADDRESS 0x0 #define VCN_AON_IP_ADDRESS 0x30000 +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev); +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p); +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block); + extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; #endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c new file mode 100644 index 000000000000..8b463c977d08 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -0,0 +1,1118 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" + +#include <drm/drm_drv.h> + +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_1_early_init - set function pointers and load microcode + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v5_0_1_set_unified_ring_funcs(adev); + vcn_v5_0_1_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v5_0_1_sw_init - sw init for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + vcn_inst = GET_INST(VCN, i); + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = true; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + vcn_v5_0_0_alloc_ip_dump(adev); + + return amdgpu_vcn_sysfs_reset_mask_init(adev); +} + +/** + * vcn_v5_0_1_sw_fini - sw fini for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + amdgpu_vcn_sysfs_reset_mask_fini(adev); + + kfree(adev->vcn.ip_dump); + + return r; +} + +/** + * vcn_v5_0_1_hw_init - start and test VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst), + adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + return 0; +} + +/** + * vcn_v5_0_1_suspend - suspend VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = vcn_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v5_0_1_resume - resume VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v5_0_1_hw_init(ip_block); + + return r; +} + +/** + * vcn_v5_0_1_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_1_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ +} + +/** + * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ +} + +/** + * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + int vcn_inst; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_1_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + return 0; +} + +/** + * vcn_v5_0_1_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v5_0_1_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r, vcn_inst; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v5_0_1_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + vcn_inst = GET_INST(VCN, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_1_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + if (status & 2) + break; + mdelay(100); + if (amdgpu_emu_mode == 1) + msleep(20); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t tmp; + int vcn_inst; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); +} + +/** + * vcn_v5_0_1_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v5_0_1_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_1_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; + } +} + +/** + * vcn_v5_0_1_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_1_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); + + return ret; +} + +/** + * vcn_v5_0_1_wait_for_idle - wait for VCN block idle + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Wait for VCN block idle + */ +static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_1_enable_clock_gating(adev, i); + } else { + vcn_v5_0_1_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v5_0_1_set_powergating_state - set VCN block powergating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_1_stop(adev); + else + ret = vcn_v5_0_1_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_1_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = { + .process = vcn_v5_0_1_process_interrupt, +}; + +/** + * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + adev->vcn.inst->irq.num_types++; + adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs; +} + +static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { + .name = "vcn_v5_0_1", + .early_init = vcn_v5_0_1_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_1_sw_init, + .sw_fini = vcn_v5_0_1_sw_fini, + .hw_init = vcn_v5_0_1_hw_init, + .hw_fini = vcn_v5_0_1_hw_fini, + .suspend = vcn_v5_0_1_suspend, + .resume = vcn_v5_0_1_resume, + .is_idle = vcn_v5_0_1_is_idle, + .wait_for_idle = vcn_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, + .set_powergating_state = vcn_v5_0_1_set_powergating_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &vcn_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h new file mode 100644 index 000000000000..82ac709f44bf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_v5_0_1_H__ +#define __VCN_v5_0_1_H__ + +extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block; + +#endif /* __VCN_v5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 0fedadd0a6a4..98fc6941159e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -605,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega10_ih_set_clockgating_state(void *handle, +static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -616,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle, } -static int vega10_ih_set_powergating_state(void *handle, +static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 1c9aff742e43..e9e3b2ed4b7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -366,6 +366,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* Enable IH Retry CAM */ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); @@ -443,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -697,10 +697,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega20_ih_set_clockgating_state(void *handle, +static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega20_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -708,7 +708,7 @@ static int vega20_ih_set_clockgating_state(void *handle, } -static int vega20_ih_set_powergating_state(void *handle, +static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index a83505815d39..06615f160331 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1945,10 +1945,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, return 0; } -static int vi_common_set_clockgating_state(void *handle, +static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1988,7 +1988,7 @@ static int vi_common_set_clockgating_state(void *handle, return 0; } -static int vi_common_set_powergating_state(void *handle, +static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 02f7ba8c93cd..984f0e705078 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820258, + 0xbf820001, 0xbf820259, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -390,141 +390,98 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, + 0xd2890000, 0x00000901, 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, + 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840063, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf84005f, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, - 0xd2890000, 0x00000900, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, + 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840063, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf84005f, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -544,137 +501,181 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200c7, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001e, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf840019, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xbf8c0f70, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200c7, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf840019, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe0070, - 0xbeff0071, 0x866f7bff, - 0x000003ff, 0xb96f4803, - 0x866f7bff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1302,7 +1303,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202d4, + 0xbf820001, 0xbf8202d5, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -1419,99 +1420,37 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -1530,31 +1469,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1574,215 +1532,259 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbefc0080, - 0xbf11017c, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850059, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbefc0080, 0xbf11017c, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850059, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xbe840080, + 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffa9, 0xbf9c0000, - 0xbf820016, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf8200e3, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf84001a, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffa9, + 0xbf9c0000, 0xbf820016, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffeb, 0xbf9c0000, + 0xbf8200e3, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf84001a, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x806fff6f, - 0x00008000, 0xbef80080, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbefc0080, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202df, + 0xbf820001, 0xbf8202e0, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -1899,99 +1901,37 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2010,31 +1950,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -2054,51 +2013,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2137,139 +2076,203 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -3151,7 +3154,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { }; static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { - 0xbf820001, 0xbf8202db, + 0xbf820001, 0xbf8202dc, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -3266,99 +3269,37 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3377,31 +3318,50 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -3421,51 +3381,31 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3504,139 +3444,203 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b79, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx12_hex[] = { @@ -4122,3 +4126,487 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, }; + +static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { + 0xbf820001, 0xbf8202ca, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, + 0x866eff78, 0x00002000, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001a, + 0x866eff7b, 0x00000400, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x03c00900, + 0xbf850011, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf850006, + 0x866eff6d, 0x00ff0000, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf85003a, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8979ff79, 0xfc000000, + 0x87797a79, 0xba7ff807, + 0x00000000, 0xb8faf812, + 0xb8fbf813, 0x8efa887a, + 0xbf0d8f7b, 0xbf840002, + 0x877bff7b, 0xffff0000, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8979ff79, 0x00800000, + 0x87796e79, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, + 0xbf850002, 0x806c846c, + 0x826d806d, 0x866dff6d, + 0x0000ffff, 0x8f7a8b79, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8378, + 0xb96ee0c2, 0xbf800002, + 0xb9780002, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8979ff79, 0xfc000000, + 0x87797a79, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2985, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb5306, 0x867bc17b, + 0xbf840052, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf84004e, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85001d, + 0x24040682, 0xd86c0000, + 0x00000002, 0xbf8cc07f, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000100, 0xd0c9006a, + 0x0000f702, 0xbf87ffe5, + 0xbf820016, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, + 0xd0c9006a, 0x0000f702, + 0xbefe016a, 0xbf87fff6, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200f4, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf840025, 0xbefe00c1, + 0xbeff00c1, 0xb8ef5306, + 0x866fc16f, 0xbf840020, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0xe0510200, 0x781d0000, + 0xe0510300, 0x781d0000, + 0xe0510400, 0x781d0000, + 0x807cff7c, 0x00000500, + 0x8078ff78, 0x00000500, + 0xbf0a6f7c, 0xbf85fff0, + 0xbefe00c1, 0xbeff00c1, + 0xbef600ff, 0x01000000, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 44772eec9ef4..96fbb16ceb21 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -34,41 +34,24 @@ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 * sp3 gfx11.sp3 -hex gfx11.hex * - * gfx12: - * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3 - * sp3 gfx12.sp3 -hex gfx12.hex */ #define CHIP_NAVI10 26 #define CHIP_SIENNA_CICHLID 30 #define CHIP_PLUM_BONITO 36 -#define CHIP_GFX12 37 #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) -#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12) +#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO) #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised -#if ASIC_FAMILY < CHIP_GFX12 #define S_COHERENCE glc:1 #define V_COHERENCE slc:1 glc:1 #define S_WAITCNT_0 s_waitcnt 0 -#else -#define S_COHERENCE scope:SCOPE_SYS -#define V_COHERENCE scope:SCOPE_SYS -#define S_WAITCNT_0 s_wait_idle - -#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO -#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI -#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC -#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC -#define HW_REG_MODE HW_REG_WAVE_MODE -#endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 @@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 var S_SAVE_PC_HI_HT_MASK = 0x01000000 -#else -var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 -var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 -var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 -var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 -var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 -var SQ_WAVE_STATUS_WAVE64_SIZE = 1 -var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 -var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV -var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK -var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK -var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 -#endif var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 #endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 @@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT var S_TRAPSTS_HWREG = HW_REG_TRAPSTS var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT -#else -var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF -var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 -var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 -var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 -var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 - -var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV -var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK -var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK -var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT -var BARRIER_STATE_SIGNAL_OFFSET = 16 -var BARRIER_STATE_VALID_OFFSET = 0 -#endif // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 @@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER: L_HALTED: // Host trap may occur while wave is halted. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK -#else - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP L_CHECK_SAVE: @@ -336,7 +266,6 @@ L_NOT_HALTED: // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. // Maskable exceptions only cause the wave to enter the trap handler if // their respective bit in mode.excp_en is set. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK s_cbranch_scc0 L_CHECK_TRAP_ID @@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH: s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT s_and_b32 ttmp2, ttmp2, ttmp3 s_cbranch_scc1 L_FETCH_2ND_TRAP -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK - s_cbranch_scc0 L_NOT_ADDR_WATCH - s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK - -L_NOT_ADDR_WATCH: - s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) - s_and_b32 ttmp2, ttmp3, ttmp2 - s_cbranch_scc1 L_FETCH_2ND_TRAP -#endif L_CHECK_TRAP_ID: // Check trap_id != 0 @@ -369,13 +287,8 @@ L_CHECK_TRAP_ID: #if SINGLE_STEP_MISSED_WORKAROUND // Prioritize single step exception over context save. // Second-level trap will halt wave and RFE, re-entering for SAVECTX. -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK -#else - // WAVE_TRAP_CTRL is already in ttmp3. - s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP #endif @@ -425,12 +338,7 @@ L_NO_NEXT_TRAP: s_cbranch_scc1 L_TRAP_CASE // Host trap will not cause trap re-entry. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_EXIT_TRAP s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK @@ -457,16 +365,7 @@ L_EXIT_TRAP: s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 -#if ASIC_FAMILY < CHIP_GFX12 s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status -#else - // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. - // Only restore fields which the trap handler changes. - s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT - s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ - SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status -#endif - s_rfe_b64 [ttmp0, ttmp1] L_SAVE: @@ -478,14 +377,6 @@ L_SAVE: s_endpgm L_HAVE_VGPRS: #endif -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT - s_cbranch_scc0 L_HAVE_VGPRS - s_endpgm -L_HAVE_VGPRS: -#endif - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] s_mov_b32 s_save_tmp, 0 s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit @@ -671,19 +562,6 @@ L_SAVE_HWREG: s_mov_b32 m0, 0x0 //Next lane of v2 to write to #endif -#if ASIC_FAMILY >= CHIP_GFX12 - // Ensure no further changes to barrier or LDS state. - // STATE_PRIV.BARRIER_COMPLETE may change up to this point. - s_barrier_signal -2 - s_barrier_wait -2 - - // Re-read final state of BARRIER_COMPLETE field for save. - s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG) - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_or_b32 s_save_status, s_save_status, s_save_tmp -#endif - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK @@ -707,21 +585,6 @@ L_SAVE_HWREG: s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) - - s_get_barrier_state s_save_tmp, -1 - s_wait_kmcnt (0) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) -#endif - #if NO_SQC_STORE // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF @@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL: s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //LDS is used? wait for other waves in the same TG -#endif s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK s_cbranch_scc0 L_SAVE_LDS_DONE @@ -1081,11 +942,6 @@ L_RESTORE: s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC -#if ASIC_FAMILY >= CHIP_GFX12 - // Save s_restore_spi_init_hi for later use. - s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi -#endif - //determine it is wave32 or wave64 get_wave_size2(s_restore_size) @@ -1320,9 +1176,7 @@ L_RESTORE_SGPR: // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception. // Clear DEBUG_EN before and restore MODE after the barrier. s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0 -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG -#endif /* restore HW registers */ L_RESTORE_HWREG: @@ -1334,11 +1188,6 @@ L_RESTORE_HWREG: s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes -#if ASIC_FAMILY >= CHIP_GFX12 - // Restore s_restore_spi_init_hi before the saved value gets clobbered. - s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save -#endif - read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) @@ -1358,44 +1207,6 @@ L_RESTORE_HWREG: s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch -#if ASIC_FAMILY >= CHIP_GFX12 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp - - // Only the first wave needs to restore the workgroup barrier. - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // Skip over WAVE_STATUS, since there is no state to restore from it - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - - s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // extract the saved signal count from s_restore_tmp - s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET - - // We need to call s_barrier_signal repeatedly to restore the signal - // count of the work group barrier. The member count is already - // initialized with the number of waves in the work group. -L_BARRIER_RESTORE_LOOP: - s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - s_barrier_signal -1 - s_add_i32 s_restore_tmp, s_restore_tmp, -1 - s_branch L_BARRIER_RESTORE_LOOP - -L_SKIP_BARRIER_RESTORE: -#endif - s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV: s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu -#if ASIC_FAMILY >= CHIP_GFX12 - // Make barrier and LDS state visible to all waves in the group. - // STATE_PRIV.BARRIER_COMPLETE may change after this point. - s_barrier_signal -2 - s_barrier_wait -2 -#endif - s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: @@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes end function get_wave_size2(s_reg) -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) -#else - s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) -#endif s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm new file mode 100644 index 000000000000..1740e98c6719 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -0,0 +1,1126 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * + * gfx12: + * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3 + * sp3 gfx12.sp3 -hex gfx12.hex + */ + +#define CHIP_GFX12 37 + +#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised + +var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 +var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 +var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 +var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 +var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 +var SQ_WAVE_STATUS_WAVE64_SIZE = 1 +var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 +var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK +var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 +var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 + +var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF +var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 +var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 +var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 + +var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT +var BARRIER_STATE_SIGNAL_OFFSET = 16 +var BARRIER_STATE_VALID_OFFSET = 0 + +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 + +// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] +// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 +var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 + +var s_sgpr_save_num = 108 + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi +var s_save_pc_lo = ttmp0 +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_state_priv = ttmp12 +var s_save_excp_flag_priv = ttmp15 +var s_save_xnack_mask = s_save_excp_flag_priv +var s_wave_size = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_mem_offset = ttmp4 +var s_save_alloc_size = s_save_excp_flag_priv +var s_save_tmp = ttmp14 +var s_save_m0 = ttmp5 +var s_save_ttmps_lo = s_save_tmp +var s_save_ttmps_hi = s_save_excp_flag_priv + +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 +var S_WAVE_SIZE = 25 + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp +var s_restore_m0 = s_restore_alloc_size +var s_restore_mode = ttmp7 +var s_restore_flat_scratch = s_restore_tmp +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_state_priv = ttmp14 +var s_restore_excp_flag_priv = ttmp15 +var s_restore_xnack_mask = ttmp13 +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_size = ttmp6 +var s_restore_ttmps_lo = s_restore_tmp +var s_restore_ttmps_hi = s_restore_alloc_size +var s_restore_spi_init_hi_save = s_restore_exec_hi + +shader main + asic(DEFAULT) + type(CS) + wave_size(32) + + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE + +L_SKIP_RESTORE: + s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC + + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK + + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + + s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_SAVE: + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. + s_sleep 0x10 + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) + s_and_b32 ttmp2, ttmp3, ttmp2 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +#if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + // WAVE_TRAP_CTRL is already in ttmp3. + s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +#endif + + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + +L_FETCH_2ND_TRAP: + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) + s_wait_idle + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_bitcmp1_b32 ttmp15, 0xF + s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA + s_or_b32 ttmp15, ttmp15, 0xFFFF0000 +L_NO_SIGN_EXTEND_TMA: + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag + s_wait_idle + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA + s_wait_idle + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA + s_wait_idle + + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_TRAP_CASE + + // Host trap will not cause trap re-entry. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_EXIT_TRAP + s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. + // Only restore fields which the trap handler changes. + s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ + SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv + + s_rfe_b64 [ttmp0, ttmp1] + +L_SAVE: + // If VGPRs have been deallocated then terminate the wavefront. + // It has no remaining program to run and cannot save without VGPRs. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT + s_cbranch_scc0 L_HAVE_VGPRS + s_endpgm +L_HAVE_VGPRS: + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) + s_wait_idle + + // Save first_wave flag so we can clear high bits of save address. + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + + // Trap temporaries must be saved via VGPR but all VGPRs are in use. + // There is no ttmp space to hold the resource constant for VGPR save. + // Save v0 by itself since it requires only two SGPRs. + s_mov_b32 s_save_ttmps_lo, exec_lo + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0xFFFFFFFF + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS + v_mov_b32 v0, 0x0 + s_mov_b32 exec_lo, s_save_ttmps_lo + s_mov_b32 exec_hi, s_save_ttmps_hi + + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_wave_size2(s_save_ttmps_hi) + get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) + get_svgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 + + v_writelane_b32 v0, ttmp4, 0x4 + v_writelane_b32 v0, ttmp5, 0x5 + v_writelane_b32 v0, ttmp6, 0x6 + v_writelane_b32 v0, ttmp7, 0x7 + v_writelane_b32 v0, ttmp8, 0x8 + v_writelane_b32 v0, ttmp9, 0x9 + v_writelane_b32 v0, ttmp10, 0xA + v_writelane_b32 v0, ttmp11, 0xB + v_writelane_b32 v0, ttmp13, 0xD + v_writelane_b32 v0, exec_lo, 0xE + v_writelane_b32 v0, exec_hi, 0xF + + s_mov_b32 exec_lo, 0x3FFF + s_mov_b32 exec_hi, 0x0 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS + v_readlane_b32 ttmp14, v0, 0xE + v_readlane_b32 ttmp15, v0, 0xF + s_mov_b32 exec_lo, ttmp14 + s_mov_b32 exec_hi, ttmp15 + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + + s_mov_b32 s_save_m0, m0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 + get_wave_size2(s_wave_size) + + /* save first 4 VGPRs, needed for SGPR save */ + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_4VGPR_WAVE32 +L_ENABLE_SAVE_4VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF + s_branch L_SAVE_4VGPR_WAVE64 +L_SAVE_4VGPR_WAVE32: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + s_branch L_SAVE_HWREG + +L_SAVE_4VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + /* save HW registers */ + +L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource + v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource + v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store + s_mov_b32 m0, 0x0 //Next lane of v2 to write to + + // Ensure no further changes to barrier or LDS state. + // STATE_PRIV.BARRIER_COMPLETE may change up to this point. + s_barrier_signal -2 + s_barrier_wait -2 + + // Re-read final state of BARRIER_COMPLETE field for save. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV) + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp + + write_hwreg_to_v2(s_save_m0) + write_hwreg_to_v2(s_save_pc_lo) + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + write_hwreg_to_v2(s_save_tmp) + write_hwreg_to_v2(s_save_exec_lo) + write_hwreg_to_v2(s_save_exec_hi) + write_hwreg_to_v2(s_save_state_priv) + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + write_hwreg_to_v2(s_save_tmp) + + write_hwreg_to_v2(s_save_xnack_mask) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + write_hwreg_to_v2(s_save_tmp) + + s_get_barrier_state s_save_tmp, -1 + s_wait_kmcnt (0) + write_hwreg_to_v2(s_save_tmp) + + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF + s_mov_b32 exec_hi, 0x0 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. + s_mov_b32 exec_lo, 0xFFFFFFFF + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + + // SGPR SR memory offset : size(VGPR)+size(SVGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states +L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_v2(s0) + + s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? + s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 + s_mov_b32 ttmp13, 0x0 + v_mov_b32 v2, 0x0 +L_SAVE_SGPR_SKIP_TCP_STORE: + + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? + + //save the rest 12 SGPR + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + write_12sgpr_to_v2(s0) + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + /* save LDS */ + +L_SAVE_LDS: + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_LDS_NORMAL +L_ENABLE_SAVE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_LDS_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + //load 0~63*4(byte address) to vgpr v0 + v_mbcnt_lo_u32_b32 v0, -1, 0 + v_mbcnt_hi_u32_b32 v0, -1, v0 + v_mul_u32_u24 v0, 4, v0 + + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_SAVE_LDS_W64 + +L_SAVE_LDS_W32: + s_mov_b32 s3, 128 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W32: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_W64: + s_mov_b32 s3, 256 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W64: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? + +L_SAVE_LDS_DONE: + /* save VGPRs - set the Rest VGPRs */ +L_SAVE_VGPR: + // VGPR SR memory offset: 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI + s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_VGPR_NORMAL +L_ENABLE_SAVE_VGPR_EXEC_HI: + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_VGPR_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_SAVE_VGPR_WAVE64 + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + +L_SAVE_VGPR_W32_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? + + s_branch L_SAVE_VGPR_END + +L_SAVE_VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_SHARED_VGPR + +L_SAVE_VGPR_W64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? + +L_SAVE_SHARED_VGPR: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //save shared_vgpr will start from the index of m0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 + +L_SAVE_SHARED_VGPR_WAVE64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? + +L_SAVE_VGPR_END: + s_branch L_END_PGM + +L_RESTORE: + /* Setup Resource Contants */ + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + + // Save s_restore_spi_init_hi for later use. + s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi + + //determine it is wave32 or wave64 + get_wave_size2(s_restore_size) + + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ +L_RESTORE_LDS: + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_LDS_NORMAL +L_ENABLE_RESTORE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_LDS_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 + +L_RESTORE_LDS_LOOP_W32: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 128 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? + s_branch L_RESTORE_VGPR + +L_RESTORE_LDS_LOOP_W64: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 256 // 256 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? + + /* restore VGPRs */ +L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_VGPR_NORMAL +L_ENABLE_RESTORE_VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_VGPR_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE32_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? + + /* VGPR restore on v0 */ + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3 + s_wait_idle + + s_branch L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE64: + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SHARED_VGPR + +L_RESTORE_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + +L_RESTORE_SHARED_VGPR: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //restore shared_vgpr will start from the index of m0 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 +L_RESTORE_SHARED_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + + s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! + + /* VGPR restore on v0 */ +L_RESTORE_V0: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3 + s_wait_idle + + /* restore SGPRs */ + //will be 2+8+16*6 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) +L_RESTORE_SGPR: + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 m0, s_sgpr_save_num + + read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + + read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP + + // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception. + // Clear DEBUG_EN before and restore MODE after the barrier. + s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0 + + /* restore HW registers */ +L_RESTORE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // Restore s_restore_spi_init_hi before the saved value gets clobbered. + s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp + + // Only the first wave needs to restore the workgroup barrier. + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // Skip over WAVE_STATUS, since there is no state to restore from it + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // extract the saved signal count from s_restore_tmp + s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET + + // We need to call s_barrier_signal repeatedly to restore the signal + // count of the work group barrier. The member count is already + // initialized with the number of waves in the work group. +L_BARRIER_RESTORE_LOOP: + s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + s_barrier_signal -1 + s_add_i32 s_restore_tmp, s_restore_tmp, -1 + s_branch L_BARRIER_RESTORE_LOOP + +L_SKIP_BARRIER_RESTORE: + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. + // Only restore the other fields to avoid clobbering them. + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv + + s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode + + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) + get_svgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS + s_wait_idle + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu + + // Make barrier and LDS state visible to all waves in the group. + // STATE_PRIV.BARRIER_COMPLETE may change after this point. + s_barrier_signal -2 + s_barrier_wait -2 + + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + +L_END_PGM: + s_endpgm_saved +end + +function write_hwreg_to_v2(s) + // Copy into VGPR for later TCP store. + v_writelane_b32 v2, s, m0 + s_add_u32 m0, m0, 0x1 +end + + +function write_16sgpr_to_v2(s) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], ttmp13 + s_add_u32 ttmp13, ttmp13, 0x1 + end +end + +function write_12sgpr_to_v2(s) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], ttmp13 + s_add_u32 ttmp13, ttmp13, 0x1 + end +end + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*8 + s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*4 + s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_bitcmp1_b32 s_size, S_WAVE_SIZE + s_cbranch_scc1 L_ENABLE_SHIFT_W64 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) + s_branch L_SHIFT_DONE +L_ENABLE_SHIFT_W64: + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) +L_SHIFT_DONE: +end + +function get_svgpr_size_bytes(s_svgpr_size_byte) + s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) +end + +function get_sgpr_size_bytes + return 512 +end + +function get_hwreg_size_bytes + return 128 +end + +function get_wave_size2(s_reg) + s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) + s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE +end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index bb26338204f4..6869e07a2fff 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -37,17 +37,28 @@ * gc_9_4_3: * cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3 * sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex + * + * gc_9_5_0: + * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3 + * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex */ #define CHIP_VEGAM 18 #define CHIP_ARCTURUS 23 #define CHIP_ALDEBARAN 25 #define CHIP_GC_9_4_3 26 +#define CHIP_GC_9_5_0 27 var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised +#if ASIC_FAMILY < CHIP_GC_9_4_3 +#define VMEM_MODIFIERS slc:1 glc:1 +#else +#define VMEM_MODIFIERS sc0:1 nt:1 +#endif + /**************************************************************************/ /* variables */ /**************************************************************************/ @@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +#if ASIC_FAMILY >= CHIP_GC_9_5_0 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11 +var LDS_RESTORE_GRANULARITY_BYTES = 1280 +#else var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var LDS_RESTORE_GRANULARITY_BYTES = 512 +#endif var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 @@ -430,7 +447,9 @@ L_SAVE: s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - + // Clear VSKIP state now that MODE.VSKIP has been saved. + // If user shader set it then vector instructions would be skipped. + s_setvskip 0,0 /* the first wave in the threadgroup */ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit @@ -557,12 +576,21 @@ if SAVE_AFTER_XNACK_ERROR v_lshlrev_b32 v2, 2, v3 L_SAVE_LDS_LOOP_SQC: +#if ASIC_FAMILY < CHIP_GC_9_5_0 ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40 s_waitcnt lgkmcnt(0) - write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset) v_add_u32 v2, 0x200, v2 +#else + // gfx950 needs to save in multiple of 256 bytes. + ds_read_b32 v0, v2 + s_waitcnt lgkmcnt(0) + write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset) + + v_add_u32 v2, 0x100, v2 +#endif + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC @@ -581,11 +609,14 @@ end L_SAVE_LDS_LOOP_VECTOR: ds_read_b64 v[0:1], v2 //x =LDS[a], byte address s_waitcnt lgkmcnt(0) - buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1 // s_waitcnt vmcnt(0) // v_add_u32 v2, vcc[0:1], v2, v3 v_add_u32 v2, v2, v3 v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + s_mov_b64 exec, vcc +#endif s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR // restore rsrc3 @@ -748,8 +779,13 @@ L_RESTORE: L_RESTORE_LDS_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW - s_add_u32 m0, m0, 256*2 // 128 DW - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW +#endif + s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? @@ -979,17 +1015,17 @@ L_TCP_STORE_CHECK_DONE: end function write_4vgprs_to_mem(s_rsrc, s_mem_offset) - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 - buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 - buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 + buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS + buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 + buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 + buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3 end function read_4vgprs_from_mem(s_rsrc, s_mem_offset) - buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 - buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 - buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 + buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS + buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 + buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 + buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3 s_waitcnt vmcnt(0) end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 7b826a136ceb..693469c18c60 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1423,6 +1423,7 @@ err: static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + bool cache_line_size_missing, struct kfd_gpu_cache_info *pcache_info) { struct amdgpu_device *adev = kdev->adev; @@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 64; i++; } /* GL1 Data Cache per SA */ @@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + if (cache_line_size_missing) + pcache_info[i].cache_line_size = 128; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* L3 Data Cache per GPU */ @@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + pcache_info[i].cache_line_size = 64; i++; } return i; @@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; + bool cache_line_size_missing = false; switch (kdev->adev->asic_type) { case CHIP_KAVERI: @@ -1628,6 +1639,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): num_of_cache_types = kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, *pcache_info); @@ -1692,10 +1704,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + /* Cacheline size not available in IP discovery for gc11. + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it + */ + cache_line_size_missing = true; + fallthrough; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, + cache_line_size_missing, + *pcache_info); break; default: *pcache_info = dummy_cache_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 312dfa84f29f..a8abc3091801 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; + struct amdgpu_device *adev = pdd->dev->adev; + int r; if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; + if (!pdd->proc_ctx_cpu_ptr) { + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (r) { + dev_err(adev->dev, + "failed to allocate process context bo\n"); + return r; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, pdd->watch_points, flags, sq_trap_en); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 924d0fd85dfb..27aa1a5b120f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) || KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9b51dd75fefc..a29374c86405 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(4, 4, 0):/* ALDEBARAN */ case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 5): + case IP_VERSION(4, 4, 4): case IP_VERSION(5, 0, 0):/* NAVI10 */ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ case IP_VERSION(5, 0, 2):/* NAVI14 */ @@ -152,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) break; case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ case IP_VERSION(9, 4, 4): /* GC 9.4.4 */ + case IP_VERSION(9, 5, 0): /* GC 9.5.0 */ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9_4_3; break; @@ -356,6 +358,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 90402; f2g = &gc_9_4_3_kfd2kgd; break; + case IP_VERSION(9, 5, 0): + gfx_target_version = 90500; + f2g = &gc_9_4_3_kfd2kgd; + break; /* Navi10 */ case IP_VERSION(10, 1, 10): gfx_target_version = 100100; @@ -515,6 +521,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > KFD_CWSR_TMA_OFFSET); @@ -567,6 +577,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x28) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b) || @@ -638,6 +649,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) struct kfd_node *knode; unsigned int i; + /* + * flush_work ensures that there are no outstanding + * work-queue items that will access interrupt_ring. New work items + * can't be created because we stopped interrupt handling above. + */ + flush_workqueue(kfd->ih_wq); + destroy_workqueue(kfd->ih_wq); + for (i = 0; i < num_nodes; i++) { knode = kfd->nodes[i]; device_queue_manager_uninit(knode->dqm); @@ -733,14 +752,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; - /* For GFX9.4.3, we need special handling for VMIDs depending on - * partition mode. + /* For multi-partition capable GPUs, we need special handling for VMIDs + * depending on partition mode. * In CPX mode, the VMID range needs to be shared between XCDs. * Additionally, there are 13 VMIDs (3-15) available for KFD. To * divide them equally, we change starting VMID to 4 and not use * VMID 3. - * If the VMID range changes for GFX9.4.3, then this code MUST be - * revisited. + * If the VMID range changes for multi-partition capable GPUs, then + * this code MUST be revisited. */ if (kfd->adev->xcp_mgr) { partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, @@ -805,14 +824,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; /* - * For GFX9.4.3, the KFD abstracts all partitions within a socket as - * xGMI connected in the topology so assign a unique hive id per - * device based on the pci device location if device is in PCIe mode. + * For multi-partition capable GPUs, the KFD abstracts all partitions + * within a socket as xGMI connected in the topology so assign a unique + * hive id per device based on the pci device location if device is in + * PCIe mode. */ - if (!kfd->hive_id && - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && - kfd->num_nodes > 1) + if (!kfd->hive_id && kfd->num_nodes > 1) kfd->hive_id = pci_dev_id(kfd->adev->pdev); kfd->noretry = kfd->adev->gmc.noretry; @@ -850,12 +867,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); } - if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && - partition_mode == AMDGPU_CPX_PARTITION_MODE && + if (partition_mode == AMDGPU_CPX_PARTITION_MODE && kfd->num_nodes != 1) { - /* For GFX9.4.3 and CPX mode, first XCD gets VMID range - * 4-9 and second XCD gets VMID range 10-15. + /* For multi-partition capable GPUs and CPX mode, first + * XCD gets VMID range 4-9 and second XCD gets VMID + * range 10-15. */ node->vm_info.first_vmid_kfd = (i%2 == 0) ? @@ -879,8 +895,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, amdgpu_amdkfd_get_local_mem_info(kfd->adev, &node->local_mem_info, node->xcp); - if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) + if (kfd->adev->xcp_mgr) kfd_setup_interrupt_bitmap(node, i); /* Initialize the KFD node */ @@ -1059,21 +1074,6 @@ static int kfd_resume(struct kfd_node *node) return err; } -static inline void kfd_queue_work(struct workqueue_struct *wq, - struct work_struct *work) -{ - int cpu, new_cpu; - - cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) - break; - } while (cpu != new_cpu); - - queue_work_on(new_cpu, wq, work); -} - /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { @@ -1099,7 +1099,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) patched_ihre, &is_patched) && enqueue_ih_ring_entry(node, is_patched ? patched_ihre : ih_ring_entry)) { - kfd_queue_work(node->ih_wq, &node->interrupt_work); + queue_work(node->kfd->ih_wq, &node->interrupt_work); spin_unlock_irqrestore(&node->interrupt_lock, flags); return; } @@ -1514,6 +1514,73 @@ bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) return kfd_compute_active(node); } +/** + * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9 + * @adev: amdgpu device + * @entry: vm fault interrupt vector + * @retry_fault: if this is retry fault + * + * retry fault - + * with CAM enabled, adev primary ring + * | gmc_v9_0_process_interrupt() + * adev soft_ring + * | gmc_v9_0_process_interrupt() worker failed to recover page fault + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * without CAM, adev primary ring1 + * | gmc_v9_0_process_interrupt worker failed to recvoer page fault + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * no-retry fault - + * adev primary ring + * | gmc_v9_0_process_interrupt() + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault + * of same process, don't copy interrupt to KFD node ih_fifo. + * With gdb debugger enabled, need convert the retry fault to no-retry fault for + * debugger, cannot use the fast path. + * + * Return: + * true - use the fast path to handle this fault + * false - use normal path to handle it + */ +bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault) +{ + struct kfd_process *p; + u32 cam_index; + + if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) { + p = kfd_lookup_process_by_pasid(entry->pasid); + if (!p) + return true; + + if (p->gpu_page_fault && !p->debug_trap_enabled) { + if (retry_fault && adev->irq.retry_cam_enabled) { + cam_index = entry->src_data[2] & 0x3ff; + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + } + + kfd_unref_process(p); + return true; + } + + /* + * This is the first page fault, set flag and then signal user space + */ + p->gpu_page_fault = true; + kfd_unref_process(p); + } + return false; +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c79fe9069e22..d4593374e7a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (!down_read_trylock(&adev->reset_domain->sem)) return -EIO; + if (!pdd->proc_ctx_cpu_ptr) { + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (r) { + dev_err(adev->dev, + "failed to allocate process context bo\n"); + return r; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_id = qpd->pqm->process->pasid; queue_input.page_table_base_addr = qpd->page_table_base; @@ -2310,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; @@ -2373,6 +2388,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm, q->process); int ret = 0; + if (WARN_ON(!pdd)) + return ret; + if (pdd->qpd.is_debug) return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 210bcc048f4c..67137e674f1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -64,7 +64,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ea3792249209..d075f24e5f9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -748,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint64_t *slots = page_slots(p->signal_page); uint32_t id; + /* + * If id is valid but slot is not signaled, GPU may signal the same event twice + * before driver have chance to process the first interrupt, then signal slot is + * auto-reset after set_event wakeup the user space, just drop the second event as + * the application only need wakeup once. + */ + if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) && + partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT) + goto out_unlock; + if (valid_id_bits) pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); @@ -776,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, } } +out_unlock: rcu_read_unlock(); kfd_unref_process(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d46a13156ee9..0cb5c582ce7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -184,6 +184,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX); break; case SOC15_IH_CLIENTID_VMC: case SOC15_IH_CLIENTID_VMC1: @@ -213,6 +214,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA); break; default: dev_warn(dev->adev->dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9b6b6e882593..783c2f5a04e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -46,7 +46,7 @@ #include <linux/kfifo.h> #include "kfd_priv.h" -#define KFD_IH_NUM_ENTRIES 8192 +#define KFD_IH_NUM_ENTRIES 16384 static void interrupt_wq(struct work_struct *); @@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node) return r; } - node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); - if (unlikely(!node->ih_wq)) { - kfifo_free(&node->ih_fifo); - dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); - return -ENOMEM; + if (!node->kfd->ih_wq) { + node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND, + node->kfd->num_nodes); + if (unlikely(!node->kfd->ih_wq)) { + kfifo_free(&node->ih_fifo); + dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); + return -ENOMEM; + } } spin_lock_init(&node->interrupt_lock); @@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node) spin_lock_irqsave(&node->interrupt_lock, flags); node->interrupts_active = false; spin_unlock_irqrestore(&node->interrupt_lock, flags); - - /* - * flush_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_workqueue(node->ih_wq); - - destroy_workqueue(node->ih_wq); - kfifo_free(&node->ih_fifo); } @@ -114,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node) */ bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry) { - int count; - - count = kfifo_in(&node->ih_fifo, ih_ring_entry, - node->kfd->device_info.ih_ring_entry_size); - if (count != node->kfd->device_info.ih_ring_entry_size) { - dev_dbg_ratelimited(node->adev->dev, - "Interrupt ring overflow, dropping interrupt %d\n", - count); + if (kfifo_is_full(&node->ih_fifo)) { + dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n", + node->node_id); return false; } + kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size); return true; } /* * Assumption: single reader/writer. This function is not re-entrant */ -static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry) +static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry) { int count; - count = kfifo_out(&node->ih_fifo, ih_ring_entry, - node->kfd->device_info.ih_ring_entry_size); - - WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size); + if (kfifo_is_empty(&node->ih_fifo)) + return false; + count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry, + node->kfd->device_info.ih_ring_entry_size); + WARN_ON(count != node->kfd->device_info.ih_ring_entry_size); return count == node->kfd->device_info.ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) { - struct kfd_node *dev = container_of(work, struct kfd_node, - interrupt_work); - uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; + struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work); + uint32_t *ih_ring_entry; unsigned long start_jiffies = jiffies; - if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { - dev_err_once(dev->adev->dev, "Ring entry too small\n"); - return; - } - - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { + while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) { dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); + kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size); + if (time_is_before_jiffies(start_jiffies + HZ)) { /* If we spent more than a second processing signals, * reschedule the worker to avoid soft-lockup warnings */ - queue_work(dev->ih_wq, &dev->interrupt_work); + queue_work(dev->kfd->ih_wq, &dev->interrupt_work); break; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index eacfeb32f35d..d05d199b5e44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -278,10 +278,11 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t ttm_res_offset) { - uint64_t npages = migrate->cpages; + uint64_t npages = migrate->npages; struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; + uint64_t mpages = 0; dma_addr_t *src; uint64_t *dst; uint64_t i, j; @@ -295,18 +296,20 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); - for (i = j = 0; i < npages; i++) { + for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) { struct page *spage; - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - migrate->dst[i] = migrate_pfn(migrate->dst[i]); - + if (migrate->src[i] & MIGRATE_PFN_MIGRATE) { + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + mpages++; + } spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, src[i]); if (r) { dev_err(dev, "%s: fail %d dma_map_page\n", @@ -353,9 +356,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, out_free_vram_pages: if (r) { pr_debug("failed %d to copy memory to vram\n", r); - while (i--) { + for (i = 0; i < npages && mpages; i++) { + if (!dst[i]) + continue; svm_migrate_put_vram_page(adev, dst[i]); migrate->dst[i] = 0; + mpages--; } } @@ -629,7 +635,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, goto out_oom; } - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, dst[i]); if (r) { dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 84e8ea3a8a0c..ff417d5361c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) { m->compute_static_thread_mgmt_se4 = se_mask[4]; m->compute_static_thread_mgmt_se5 = se_mask[5]; m->compute_static_thread_mgmt_se6 = se_mask[6]; @@ -301,7 +302,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_ctx_save_control = 0; if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); @@ -885,7 +887,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->debugfs_show_mqd = debugfs_show_mqd; #endif if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; @@ -909,8 +912,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif + mqd->check_preemption_failed = check_preemption_failed; if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { mqd->init_mqd = init_mqd_hiq_v9_4_3; mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 37930629edc5..4984b41cd372 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -28,6 +28,10 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) +#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) +#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) { @@ -40,7 +44,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - bool *over_subscription) + int *over_subscription) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; @@ -58,17 +62,20 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; + *over_subscription = 0; if (node->max_proc_per_quantum > 1) max_proc_per_quantum = node->max_proc_per_quantum; - if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm) || - gws_queue_count > 1) { - *over_subscription = true; + if (process_count > max_proc_per_quantum) + *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT; + if (compute_queue_count > get_cp_queues_num(pm->dqm)) + *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; + if (gws_queue_count > 1) + *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + + if (*over_subscription) dev_dbg(dev, "Over subscribed runlist\n"); - } map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ @@ -89,7 +96,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - bool *is_over_subscription) + int *is_over_subscription) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; @@ -134,7 +141,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription; + int is_over_subscription; rl_wptr = retval = processes_mapped = 0; @@ -213,15 +220,20 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (is_over_subscription) { if (!pm->is_over_subscription) - dev_warn( - dev, - "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); + dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n", + is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? + " too many processes." : "", + is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? + " too many queues." : "", + is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? + " multiple processes using cooperative launch." : ""); + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); } - pm->is_over_subscription = is_over_subscription; + pm->is_over_subscription = !!is_over_subscription; for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); @@ -248,7 +260,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) default: if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0)) pm->pmf = &kfd_aldebaran_pm_funcs; else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) pm->pmf = &kfd_v9_pm_funcs; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e5ca0b93b2a..d8cd913aa772 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -32,7 +32,7 @@ #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/spinlock.h> -#include <linux/kfd_ioctl.h> +#include <uapi/linux/kfd_ioctl.h> #include <linux/idr.h> #include <linux/kfifo.h> #include <linux/seq_file.h> @@ -207,7 +207,8 @@ enum cache_policy { #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\ ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \ - (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))) + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \ + (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))) struct kfd_node; @@ -273,7 +274,6 @@ struct kfd_node { /* Interrupts */ struct kfifo ih_fifo; - struct workqueue_struct *ih_wq; struct work_struct interrupt_work; spinlock_t interrupt_lock; @@ -366,6 +366,8 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; + struct workqueue_struct *ih_wq; + /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; @@ -1002,6 +1004,9 @@ struct kfd_process { struct semaphore runtime_enable_sema; bool is_runtime_retry; struct kfd_runtime_info runtime_info; + + /* if gpu page fault sent to KFD */ + bool gpu_page_fault; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -1150,7 +1155,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, uint32_t i; if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 87cd52cf4ee9..083f83c94531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfd_free_process_doorbells(pdd->dev->kfd, pdd); - if (pdd->dev->kfd->shared_resources.enable_mes) + if (pdd->dev->kfd->shared_resources.enable_mes && + pdd->proc_ctx_cpu_ptr) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, &pdd->proc_ctx_bo); /* @@ -1159,7 +1160,8 @@ static void kfd_process_wq_release(struct work_struct *work) */ synchronize_rcu(); ef = rcu_access_pointer(p->ef); - dma_fence_signal(ef); + if (ef) + dma_fence_signal(ef); kfd_process_remove_sysfs(p); @@ -1608,7 +1610,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, struct kfd_process *p) { struct kfd_process_device *pdd = NULL; - int retval = 0; if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) return NULL; @@ -1632,21 +1633,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); - if (dev->kfd->shared_resources.enable_mes) { - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); - if (retval) { - dev_err(dev->adev->dev, - "failed to allocate process context bo\n"); - goto err_free_pdd; - } - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - } - p->pdds[p->n_pdds++] = pdd; if (kfd_dbg_is_per_vmid_supported(pdd->dev)) pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( @@ -1658,10 +1644,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, idr_init(&pdd->alloc_idr); return pdd; - -err_free_pdd: - kfree(pdd); - return NULL; } /** @@ -2146,10 +2128,11 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd) irq_drain_fence[3] = pdd->process->pasid; /* - * For GFX 9.4.3, send the NodeId also in IH cookie DW[3] + * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3] */ if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) { node_id = ffs(pdd->dev->interrupt_bitmap) - 1; irq_drain_fence[3] |= node_id << 16; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c76db22a1000..bcddd989c7f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); @@ -131,8 +134,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) && !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, @@ -197,6 +201,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, if (pqn->q->gws) { if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) && !dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process( pqm->process->kgd_process_info, pqn->q->gws); @@ -212,13 +217,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; - struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { - pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); - kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); - kfd_queue_release_buffers(pdd, &pqn->q->properties); + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, + pqm->process); + if (pdd) { + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); + } else { + WARN_ON(!pdd); + } pqm_clean_queue_resource(pqm, pqn); } @@ -316,11 +325,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, unsigned int max_queues = 127; /* HWS limit */ /* - * On GFX 9.4.3, increase the number of queues that - * can be created to 255. No HWS limit on GFX 9.4.3. + * On GFX 9.4.3/9.5.0, increase the number of queues that + * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0. */ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) max_queues = 255; q = NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ad29634f8b44..ecccd7adbab4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -394,7 +394,8 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ - gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ + gfxv == 90500) vgpr_size = 0x80000; else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ @@ -405,9 +406,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) return vgpr_size; } -#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ + HWREG_SIZE_PER_CU) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -431,7 +433,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) : cu_num * 32; - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, PAGE_SIZE); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3e2911895c74..bd3e20d981e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1195,6 +1195,7 @@ svm_range_get_pte_flags(struct kfd_node *node, struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; + uint32_t gc_ip_version = KFD_GC_VERSION(node); uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); @@ -1204,7 +1205,7 @@ svm_range_get_pte_flags(struct kfd_node *node, if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1241,8 +1242,10 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1257,9 +1260,13 @@ svm_range_get_pte_flags(struct kfd_node *node, */ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P or extended system scope coherence */ - else + /* PCIe P2P on GPUs pre-9.5.0 */ + else if (gc_ip_version < IP_VERSION(9, 5, 0) && + !svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* Other remote memory */ + else + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { /* On NUMA systems, locality is determined per-page @@ -1271,7 +1278,10 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (gc_ip_version < IP_VERSION(9, 5, 0)) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; } break; case IP_VERSION(12, 0, 0): @@ -1299,7 +1309,7 @@ svm_range_get_pte_flags(struct kfd_node *node, pte_flags = AMDGPU_PTE_VALID; pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0)) + if (gc_ip_version >= IP_VERSION(12, 0, 0)) pte_flags |= AMDGPU_PTE_IS_PTE; pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9476e30d6baa..ceb9fb475ef1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1714,7 +1714,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->cacheline_size = pcache_info[cache_type].cache_line_size; if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0)) mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); else mode = UNKNOWN_MEMORY_PARTITION_MODE; @@ -1776,7 +1777,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; int gpu_processor_id; - struct kfd_cache_properties *props_ext; + struct kfd_cache_properties *props_ext = NULL; int num_of_entries = 0; int num_of_cache_types = 0; struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 11e3f2f3b174..abd3b6564373 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,6 +8,8 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64 + select CEC_CORE + select CEC_NOTIFIER select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 35c486ca74f6..1ea40696c955 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -98,6 +98,7 @@ #include <drm/drm_audio_component.h> #include <drm/drm_gem_atomic_helper.h> +#include <media/cec-notifier.h> #include <acpi/video.h> #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" @@ -956,13 +957,13 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) } } -static int dm_set_clockgating_state(void *handle, +static int dm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dm_set_powergating_state(void *handle, +static int dm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -2033,6 +2034,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH) adev->dm.dc->debug.force_subvp_mclk_switch = true; + if (amdgpu_dc_debug_mask & DC_DISABLE_SUBVP) + adev->dm.dc->debug.force_disable_subvp = true; + if (amdgpu_dc_debug_mask & DC_ENABLE_DML2) { adev->dm.dc->debug.using_dml2 = true; adev->dm.dc->debug.using_dml21 = true; @@ -2155,9 +2159,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - adev->dm.secure_display_ctxs = amdgpu_dm_crtc_secure_display_create_contexts(adev); - if (!adev->dm.secure_display_ctxs) + amdgpu_dm_crtc_secure_display_create_contexts(adev); + if (!adev->dm.secure_display_ctx.crtc_ctx) DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); + + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) + adev->dm.secure_display_ctx.support_mul_roi = true; + #endif DRM_DEBUG_DRIVER("KMS initialized.\n"); @@ -2200,15 +2208,15 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) amdgpu_dm_destroy_drm_device(&adev->dm); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - if (adev->dm.secure_display_ctxs) { + if (adev->dm.secure_display_ctx.crtc_ctx) { for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->dm.secure_display_ctxs[i].crtc) { - flush_work(&adev->dm.secure_display_ctxs[i].notify_ta_work); - flush_work(&adev->dm.secure_display_ctxs[i].forward_roi_work); + if (adev->dm.secure_display_ctx.crtc_ctx[i].crtc) { + flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].notify_ta_work); + flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].forward_roi_work); } } - kfree(adev->dm.secure_display_ctxs); - adev->dm.secure_display_ctxs = NULL; + kfree(adev->dm.secure_display_ctx.crtc_ctx); + adev->dm.secure_display_ctx.crtc_ctx = NULL; } #endif if (adev->dm.hdcp_workqueue) { @@ -2341,7 +2349,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, AMDGPU_UCODE_REQUIRED, + "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -2749,6 +2758,48 @@ out_fail: mutex_unlock(&mgr->lock); } +void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector) +{ + struct cec_notifier *n = aconnector->notifier; + + if (!n) + return; + + cec_notifier_phys_addr_invalidate(n); +} + +void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector) +{ + struct drm_connector *connector = &aconnector->base; + struct cec_notifier *n = aconnector->notifier; + + if (!n) + return; + + cec_notifier_set_phys_addr(n, + connector->display_info.source_physical_address); +} + +static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend) +{ + struct amdgpu_dm_connector *aconnector; + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + + drm_connector_list_iter_begin(ddev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + aconnector = to_amdgpu_dm_connector(connector); + if (suspend) + hdmi_cec_unset_edid(aconnector); + else + hdmi_cec_set_edid(aconnector); + } + drm_connector_list_iter_end(&conn_iter); +} + static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; @@ -3020,6 +3071,8 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block) if (IS_ERR(adev->dm.cached_state)) return PTR_ERR(adev->dm.cached_state); + s3_handle_hdmi_cec(adev_to_drm(adev), true); + s3_handle_mst(adev_to_drm(adev), true); amdgpu_dm_irq_suspend(adev); @@ -3292,6 +3345,8 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) */ amdgpu_dm_irq_resume_early(adev); + s3_handle_hdmi_cec(ddev, false); + /* On resume we need to rewrite the MSTM control bits to enable MST*/ s3_handle_mst(ddev, false); @@ -3606,6 +3661,7 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_retain(aconnector->dc_sink); if (sink->dc_edid.length == 0) { aconnector->drm_edid = NULL; + hdmi_cec_unset_edid(aconnector); if (aconnector->dc_link->aux_mode) { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); } @@ -3615,6 +3671,7 @@ void amdgpu_dm_update_connector_after_detect( aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length); drm_edid_connector_update(connector, aconnector->drm_edid); + hdmi_cec_set_edid(aconnector); if (aconnector->dc_link->aux_mode) drm_dp_cec_attach(&aconnector->dm_dp_aux.aux, connector->display_info.source_physical_address); @@ -3631,6 +3688,7 @@ void amdgpu_dm_update_connector_after_detect( amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid); update_connector_ext_caps(aconnector); } else { + hdmi_cec_unset_edid(aconnector); drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); amdgpu_dm_update_freesync_caps(connector, NULL); aconnector->num_modes = 0; @@ -5314,7 +5372,8 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, AMDGPU_UCODE_REQUIRED, + "%s", fw_name_dmub); return r; } @@ -5530,8 +5589,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, const u64 tiling_flags, struct dc_plane_info *plane_info, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = plane_state->fb; const struct amdgpu_framebuffer *afb = @@ -5630,7 +5688,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, &plane_info->tiling_info, &plane_info->plane_size, &plane_info->dcc, address, - tmz_surface, force_disable_dcc); + tmz_surface); if (ret) return ret; @@ -5651,7 +5709,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, struct dc_scaling_info scaling_info; struct dc_plane_info plane_info; int ret; - bool force_disable_dcc = false; ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info); if (ret) @@ -5662,13 +5719,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->clip_rect = scaling_info.clip_rect; dc_plane_state->scaling_quality = scaling_info.scaling_quality; - force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; ret = fill_dc_plane_info_and_addr(adev, plane_state, afb->tiling_flags, &plane_info, &dc_plane_state->address, - afb->tmz_surface, - force_disable_dcc); + afb->tmz_surface); if (ret) return ret; @@ -7050,6 +7105,7 @@ static void amdgpu_dm_connector_unregister(struct drm_connector *connector) if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector)) sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group); + cec_notifier_conn_unregister(amdgpu_dm_connector->notifier); drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux); } @@ -8292,6 +8348,27 @@ create_i2c(struct ddc_service *ddc_service, return i2c; } +int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector) +{ + struct cec_connector_info conn_info; + struct drm_device *ddev = aconnector->base.dev; + struct device *hdmi_dev = ddev->dev; + + if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) { + drm_info(ddev, "HDMI-CEC feature masked\n"); + return -EINVAL; + } + + cec_fill_conn_info_from_drm(&conn_info, &aconnector->base); + aconnector->notifier = + cec_notifier_conn_register(hdmi_dev, NULL, &conn_info); + if (!aconnector->notifier) { + drm_err(ddev, "Failed to create cec notifier\n"); + return -ENOMEM; + } + + return 0; +} /* * Note: this function assumes that dc_link_detect() was called for the @@ -8355,6 +8432,10 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, drm_connector_attach_encoder( &aconnector->base, &aencoder->base); + if (connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) + amdgpu_dm_initialize_hdmi_connector(aconnector); + if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector_type == DRM_MODE_CONNECTOR_eDP) amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index); @@ -8414,16 +8495,6 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) { - /* - * We have no guarantee that the frontend index maps to the same - * backend index - some even map to more than one. - * - * TODO: Use a different interrupt or check DC itself for the mapping. - */ - int irq_type = - amdgpu_display_crtc_idx_to_irq_type( - adev, - acrtc->crtc_id); struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8449,28 +8520,7 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); - - amdgpu_irq_get( - adev, - &adev->pageflip_irq, - irq_type); -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_get( - adev, - &adev->vline0_irq, - irq_type); -#endif } else { -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_put( - adev, - &adev->vline0_irq, - irq_type); -#endif - amdgpu_irq_put( - adev, - &adev->pageflip_irq, - irq_type); drm_crtc_vblank_off(&acrtc->base); } } @@ -8941,6 +8991,7 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; struct amdgpu_dm_connector *aconn = (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; + bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); if (acrtc_state->update_type > UPDATE_TYPE_FAST) { if (pr->config.replay_supported && !pr->replay_feature_enabled) @@ -8967,14 +9018,15 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, * adequate number of fast atomic commits to notify KMD * of update events. See `vblank_control_worker()`. */ - if (acrtc_attach->dm_irq_params.allow_sr_entry && + if (!vrr_active && + acrtc_attach->dm_irq_params.allow_sr_entry && #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && #endif (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) { if (pr->replay_feature_enabled && !pr->replay_allow_active) amdgpu_dm_replay_enable(acrtc_state->stream, true); - if (psr->psr_version >= DC_PSR_VERSION_SU_1 && + if (psr->psr_version == DC_PSR_VERSION_SU_1 && !psr->psr_allow_active && !aconn->disallow_edp_enter_psr) amdgpu_dm_psr_enable(acrtc_state->stream); } @@ -9111,7 +9163,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, afb->tiling_flags, &bundle->plane_infos[planes_count], &bundle->flip_addrs[planes_count].address, - afb->tmz_surface, false); + afb->tmz_surface); drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", new_plane_state->plane->index, @@ -9145,7 +9197,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns = timestamp_ns; if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); mutex_unlock(&dm->dc_lock); } } @@ -9311,11 +9363,11 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.abm_level = &acrtc_state->abm_level; mutex_lock(&dm->dc_lock); - if (acrtc_state->update_type > UPDATE_TYPE_FAST) { + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) { if (acrtc_state->stream->link->replay_settings.replay_allow_active) amdgpu_dm_replay_disable(acrtc_state->stream); if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); } mutex_unlock(&dm->dc_lock); @@ -10074,14 +10126,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) { #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { + uint8_t cnt; spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - acrtc->dm_irq_params.window_param.update_win = true; - - /** - * It takes 2 frames for HW to stably generate CRC when - * resuming from suspend, so we set skip_frame_cnt 2. - */ - acrtc->dm_irq_params.window_param.skip_frame_cnt = 2; + for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) { + if (acrtc->dm_irq_params.window_param[cnt].enable) { + acrtc->dm_irq_params.window_param[cnt].update_win = true; + + /** + * It takes 2 frames for HW to stably generate CRC when + * resuming from suspend, so we set skip_frame_cnt 2. + */ + acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2; + } + } spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); } #endif @@ -11169,8 +11226,8 @@ dm_get_plane_scale(struct drm_plane_state *plane_state, int plane_src_w, plane_src_h; dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h); - *out_plane_scale_w = plane_state->crtc_w * 1000 / plane_src_w; - *out_plane_scale_h = plane_state->crtc_h * 1000 / plane_src_h; + *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0; + *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0; } /* @@ -11424,6 +11481,30 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, return 0; } +static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, + struct drm_atomic_state *state, + struct drm_crtc_state *crtc_state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state, *old_plane_state; + + drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { + new_plane_state = drm_atomic_get_plane_state(state, plane); + old_plane_state = drm_atomic_get_plane_state(state, plane); + + if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { + DRM_ERROR("Failed to get plane state for plane %s\n", plane->name); + return false; + } + + if (old_plane_state->fb && new_plane_state->fb && + get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) + return true; + } + + return false; +} + /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. * @@ -11621,10 +11702,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) { - if (old_plane_state->fb && new_plane_state->fb && - get_mem_type(old_plane_state->fb) != - get_mem_type(new_plane_state->fb)) - lock_and_validation_needed = true; ret = dm_update_plane_state(dc, state, plane, old_plane_state, @@ -11919,9 +11996,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* * Only allow async flips for fast updates that don't change - * the FB pitch, the DCC state, rotation, etc. + * the FB pitch, the DCC state, rotation, mem_type, etc. */ - if (new_crtc_state->async_flip && lock_and_validation_needed) { + if (new_crtc_state->async_flip && + (lock_and_validation_needed || + amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] async flips are only supported for fast updates\n", crtc->base.id, crtc->name); @@ -12253,10 +12332,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { - amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; - amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { + amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + } + parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (vsdb_info.replay_mode) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2cf9f6ca20bc..6bc6e8d10521 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -541,12 +541,12 @@ struct amdgpu_display_manager { #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /** - * @secure_display_ctxs: + * @secure_display_ctx: * - * Store the ROI information and the work_struct to command dmub and psp for - * all crtcs. + * Store secure display relevant info. e.g. the ROI information + * , the work_struct to command dmub, etc. */ - struct secure_display_context *secure_display_ctxs; + struct secure_display_context secure_display_ctx; #endif /** * @hpd_rx_offload_wq: @@ -671,6 +671,8 @@ struct amdgpu_dm_connector { uint32_t connector_id; int bl_idx; + struct cec_notifier *notifier; + /* we need to mind the EDID between detect and get modes due to analog/digital/tvencoder */ const struct drm_edid *drm_edid; @@ -697,6 +699,8 @@ struct amdgpu_dm_connector { struct drm_dp_mst_port *mst_output_port; struct amdgpu_dm_connector *mst_root; struct drm_dp_aux *dsc_aux; + uint32_t mst_local_bw; + uint16_t vc_full_pbn; struct mutex handle_mst_msg_ready; /* TODO see if we can merge with ddc_bus or make a dm_connector */ @@ -1010,4 +1014,8 @@ void dm_free_gpu_mem(struct amdgpu_device *adev, bool amdgpu_dm_is_headless(struct amdgpu_device *adev); +void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector); +void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector); +int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector); + #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index f936a35fa9eb..033bd817d871 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -30,6 +30,7 @@ #include "amdgpu_dm.h" #include "dc.h" #include "amdgpu_securedisplay.h" +#include "amdgpu_dm_psr.h" static const char *const pipe_crc_sources[] = { "none", @@ -83,45 +84,274 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, } #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY +static void update_phy_id_mapping(struct amdgpu_device *adev) +{ + struct drm_device *ddev = adev_to_drm(adev); + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_connector *connector; + struct amdgpu_dm_connector *aconnector; + struct amdgpu_dm_connector *sort_connector[AMDGPU_DM_MAX_CRTC] = {NULL}; + struct drm_connector_list_iter iter; + uint8_t idx = 0, idx_2 = 0, connector_cnt = 0; + + dm->secure_display_ctx.phy_mapping_updated = false; + + mutex_lock(&ddev->mode_config.mutex); + drm_connector_list_iter_begin(ddev, &iter); + drm_for_each_connector_iter(connector, &iter) { + + if (connector->status != connector_status_connected) + continue; + + if (idx >= AMDGPU_DM_MAX_CRTC) { + DRM_WARN("%s connected connectors exceed max crtc\n", __func__); + mutex_unlock(&ddev->mode_config.mutex); + return; + } + + aconnector = to_amdgpu_dm_connector(connector); + + sort_connector[idx] = aconnector; + idx++; + connector_cnt++; + } + drm_connector_list_iter_end(&iter); + + /* sort connectors by link_enc_hw_instance first */ + for (idx = connector_cnt; idx > 1 ; idx--) { + for (idx_2 = 0; idx_2 < (idx - 1); idx_2++) { + if (sort_connector[idx_2]->dc_link->link_enc_hw_inst > + sort_connector[idx_2 + 1]->dc_link->link_enc_hw_inst) + swap(sort_connector[idx_2], sort_connector[idx_2 + 1]); + } + } + + /* + * Sort mst connectors by RAD. mst connectors with the same enc_hw_instance are already + * sorted together above. + */ + for (idx = 0; idx < connector_cnt; /*Do nothing*/) { + if (sort_connector[idx]->mst_root) { + uint8_t i, j, k; + uint8_t mst_con_cnt = 1; + + for (idx_2 = (idx + 1); idx_2 < connector_cnt; idx_2++) { + if (sort_connector[idx_2]->mst_root == sort_connector[idx]->mst_root) + mst_con_cnt++; + else + break; + } + + for (i = mst_con_cnt; i > 1; i--) { + for (j = idx; j < (idx + i - 2); j++) { + int mstb_lct = sort_connector[j]->mst_output_port->parent->lct; + int next_mstb_lct = sort_connector[j + 1]->mst_output_port->parent->lct; + u8 *rad; + u8 *next_rad; + bool swap = false; + + /* Sort by mst tree depth first. Then compare RAD if depth is the same*/ + if (mstb_lct > next_mstb_lct) { + swap = true; + } else if (mstb_lct == next_mstb_lct) { + if (mstb_lct == 1) { + if (sort_connector[j]->mst_output_port->port_num > sort_connector[j + 1]->mst_output_port->port_num) + swap = true; + } else if (mstb_lct > 1) { + rad = sort_connector[j]->mst_output_port->parent->rad; + next_rad = sort_connector[j + 1]->mst_output_port->parent->rad; + + for (k = 0; k < mstb_lct - 1; k++) { + int shift = (k % 2) ? 0 : 4; + int port_num = (rad[k / 2] >> shift) & 0xf; + int next_port_num = (next_rad[k / 2] >> shift) & 0xf; + + if (port_num > next_port_num) { + swap = true; + break; + } + } + } else { + DRM_ERROR("MST LCT shouldn't be set as < 1"); + mutex_unlock(&ddev->mode_config.mutex); + return; + } + } + + if (swap) + swap(sort_connector[j], sort_connector[j + 1]); + } + } + + idx += mst_con_cnt; + } else { + idx++; + } + } + + /* Complete sorting. Assign relavant result to dm->secure_display_ctx.phy_id_mapping[]*/ + memset(dm->secure_display_ctx.phy_id_mapping, 0, sizeof(dm->secure_display_ctx.phy_id_mapping)); + for (idx = 0; idx < connector_cnt; idx++) { + aconnector = sort_connector[idx]; + + dm->secure_display_ctx.phy_id_mapping[idx].assigned = true; + dm->secure_display_ctx.phy_id_mapping[idx].is_mst = false; + dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst = aconnector->dc_link->link_enc_hw_inst; + + if (sort_connector[idx]->mst_root) { + dm->secure_display_ctx.phy_id_mapping[idx].is_mst = true; + dm->secure_display_ctx.phy_id_mapping[idx].lct = aconnector->mst_output_port->parent->lct; + dm->secure_display_ctx.phy_id_mapping[idx].port_num = aconnector->mst_output_port->port_num; + memcpy(dm->secure_display_ctx.phy_id_mapping[idx].rad, + aconnector->mst_output_port->parent->rad, sizeof(aconnector->mst_output_port->parent->rad)); + } + } + mutex_unlock(&ddev->mode_config.mutex); + + dm->secure_display_ctx.phy_id_mapping_cnt = connector_cnt; + dm->secure_display_ctx.phy_mapping_updated = true; +} + +static bool get_phy_id(struct amdgpu_display_manager *dm, + struct amdgpu_dm_connector *aconnector, uint8_t *phy_id) +{ + int idx, idx_2; + bool found = false; + + /* + * Assume secure display start after all connectors are probed. The connection + * config is static as well + */ + if (!dm->secure_display_ctx.phy_mapping_updated) { + DRM_WARN("%s Should update the phy id table before get it's value", __func__); + return false; + } + + for (idx = 0; idx < dm->secure_display_ctx.phy_id_mapping_cnt; idx++) { + if (!dm->secure_display_ctx.phy_id_mapping[idx].assigned) { + DRM_ERROR("phy_id_mapping[%d] should be assigned", idx); + return false; + } + + if (aconnector->dc_link->link_enc_hw_inst == + dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst) { + if (!dm->secure_display_ctx.phy_id_mapping[idx].is_mst) { + found = true; + goto out; + } else { + /* Could caused by wrongly pass mst root connector */ + if (!aconnector->mst_output_port) { + DRM_ERROR("%s Check mst case but connector without a port assigned", __func__); + return false; + } + + if (aconnector->mst_root && + aconnector->mst_root->mst_mgr.mst_primary == NULL) { + DRM_WARN("%s pass in a stale mst connector", __func__); + } + + if (aconnector->mst_output_port->parent->lct == dm->secure_display_ctx.phy_id_mapping[idx].lct && + aconnector->mst_output_port->port_num == dm->secure_display_ctx.phy_id_mapping[idx].port_num) { + if (aconnector->mst_output_port->parent->lct == 1) { + found = true; + goto out; + } else if (aconnector->mst_output_port->parent->lct > 1) { + /* Check RAD */ + for (idx_2 = 0; idx_2 < aconnector->mst_output_port->parent->lct - 1; idx_2++) { + int shift = (idx_2 % 2) ? 0 : 4; + int port_num = (aconnector->mst_output_port->parent->rad[idx_2 / 2] >> shift) & 0xf; + int port_num2 = (dm->secure_display_ctx.phy_id_mapping[idx].rad[idx_2 / 2] >> shift) & 0xf; + + if (port_num != port_num2) + break; + } + + if (idx_2 == aconnector->mst_output_port->parent->lct - 1) { + found = true; + goto out; + } + } else { + DRM_ERROR("lCT should be >= 1"); + return false; + } + } + } + } + } + +out: + if (found) { + DRM_DEBUG_DRIVER("Associated secure display PHY ID as %d", idx); + *phy_id = idx; + } else { + DRM_WARN("Can't find associated phy ID"); + return false; + } + + return true; +} + static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_stream_state *stream) { struct drm_device *drm_dev = crtc->dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_dm_connector *aconnector; bool was_activated; + uint8_t phy_id; + unsigned long flags; + int i; - spin_lock_irq(&drm_dev->event_lock); - was_activated = acrtc->dm_irq_params.window_param.activated; - acrtc->dm_irq_params.window_param.x_start = 0; - acrtc->dm_irq_params.window_param.y_start = 0; - acrtc->dm_irq_params.window_param.x_end = 0; - acrtc->dm_irq_params.window_param.y_end = 0; - acrtc->dm_irq_params.window_param.activated = false; - acrtc->dm_irq_params.window_param.update_win = false; - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; - spin_unlock_irq(&drm_dev->event_lock); + spin_lock_irqsave(&drm_dev->event_lock, flags); + was_activated = acrtc->dm_irq_params.crc_window_activated; + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + acrtc->dm_irq_params.window_param[i].x_start = 0; + acrtc->dm_irq_params.window_param[i].y_start = 0; + acrtc->dm_irq_params.window_param[i].x_end = 0; + acrtc->dm_irq_params.window_param[i].y_end = 0; + acrtc->dm_irq_params.window_param[i].enable = false; + acrtc->dm_irq_params.window_param[i].update_win = false; + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0; + } + acrtc->dm_irq_params.crc_window_activated = false; + spin_unlock_irqrestore(&drm_dev->event_lock, flags); /* Disable secure_display if it was enabled */ - if (was_activated) { + if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) { /* stop ROI update on this crtc */ - flush_work(&dm->secure_display_ctxs[crtc->index].notify_ta_work); - flush_work(&dm->secure_display_ctxs[crtc->index].forward_roi_work); - dc_stream_forward_crc_window(stream, NULL, true); + flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work); + flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work); + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (aconnector && get_phy_id(dm, aconnector, &phy_id)) { + if (dm->secure_display_ctx.support_mul_roi) + dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true); + else + dc_stream_forward_crc_window(stream, NULL, phy_id, true); + } else { + DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__); + } } } static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) { - struct secure_display_context *secure_display_ctx; + struct secure_display_crtc_context *crtc_ctx; struct psp_context *psp; struct ta_securedisplay_cmd *securedisplay_cmd; struct drm_crtc *crtc; struct dc_stream_state *stream; + struct amdgpu_dm_connector *aconnector; uint8_t phy_inst; + struct amdgpu_display_manager *dm; + struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM]; + unsigned long flags; + uint8_t roi_idx = 0; int ret; + int i; - secure_display_ctx = container_of(work, struct secure_display_context, notify_ta_work); - crtc = secure_display_ctx->crtc; + crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work); + crtc = crtc_ctx->crtc; if (!crtc) return; @@ -133,21 +363,50 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) return; } + dm = &drm_to_adev(crtc->dev)->dm; stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream; - phy_inst = stream->link->link_enc_hw_inst; - - /* need lock for multiple crtcs to use the command buffer */ - mutex_lock(&psp->securedisplay_context.mutex); + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + if (!aconnector) + return; - psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, - TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + mutex_lock(&crtc->dev->mode_config.mutex); + if (!get_phy_id(dm, aconnector, &phy_inst)) { + DRM_WARN("%s Can't find mapping phy id!", __func__); + mutex_unlock(&crtc->dev->mode_config.mutex); + return; + } + mutex_unlock(&crtc->dev->mode_config.mutex); - securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + /* need lock for multiple crtcs to use the command buffer */ + mutex_lock(&psp->securedisplay_context.mutex); /* PSP TA is expected to finish data transmission over I2C within current frame, * even there are up to 4 crtcs request to send in this frame. */ - ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + if (dm->secure_display_ctx.support_mul_roi) { + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); + + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst; + + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + if (crc_cpy[i].crc_ready) + roi_idx |= 1 << i; + } + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx; + + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); + } else { + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + + securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; + + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + } if (!ret) { if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) @@ -160,22 +419,47 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) static void amdgpu_dm_forward_crc_window(struct work_struct *work) { - struct secure_display_context *secure_display_ctx; + struct secure_display_crtc_context *crtc_ctx; struct amdgpu_display_manager *dm; struct drm_crtc *crtc; struct dc_stream_state *stream; + struct amdgpu_dm_connector *aconnector; + struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM]; + unsigned long flags; + uint8_t phy_id; - secure_display_ctx = container_of(work, struct secure_display_context, forward_roi_work); - crtc = secure_display_ctx->crtc; + crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work); + crtc = crtc_ctx->crtc; if (!crtc) return; dm = &drm_to_adev(crtc->dev)->dm; stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (!aconnector) + return; + + mutex_lock(&crtc->dev->mode_config.mutex); + if (!get_phy_id(dm, aconnector, &phy_id)) { + DRM_WARN("%s Can't find mapping phy id!", __func__); + mutex_unlock(&crtc->dev->mode_config.mutex); + return; + } + mutex_unlock(&crtc->dev->mode_config.mutex); + + spin_lock_irqsave(&crtc->dev->event_lock, flags); + memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); mutex_lock(&dm->dc_lock); - dc_stream_forward_crc_window(stream, &secure_display_ctx->rect, false); + if (dm->secure_display_ctx.support_mul_roi) + dc_stream_forward_multiple_crc_window(stream, roi_cpy, + phy_id, false); + else + dc_stream_forward_crc_window(stream, &roi_cpy[0].rect, + phy_id, false); mutex_unlock(&dm->dc_lock); } @@ -186,7 +470,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc) bool ret = false; spin_lock_irq(&drm_dev->event_lock); - ret = acrtc->dm_irq_params.window_param.activated; + ret = acrtc->dm_irq_params.crc_window_activated; spin_unlock_irq(&drm_dev->event_lock); return ret; @@ -224,10 +508,14 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); + /* For PSR1, check that the panel has exited PSR */ + if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) + amdgpu_dm_psr_wait_disable(stream_state); + /* Enable or disable CRTC CRC generation */ if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, NULL, enable, enable)) { + stream_state, NULL, enable, enable, 0, true)) { ret = -EINVAL; goto unlock; } @@ -258,6 +546,10 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) struct drm_crtc_commit *commit; struct dm_crtc_state *crtc_state; struct drm_device *drm_dev = crtc->dev; +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_display_manager *dm = &adev->dm; +#endif struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct drm_dp_aux *aux = NULL; bool enable = false; @@ -357,6 +649,17 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) } + /* + * Reading the CRC requires the vblank interrupt handler to be + * enabled. Keep a reference until CRC capture stops. + */ + enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); + if (!enabled && enable) { + ret = drm_crtc_vblank_get(crtc); + if (ret) + goto cleanup; + } + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /* Reset secure_display when we change crc source from debugfs */ amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream); @@ -367,16 +670,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) goto cleanup; } - /* - * Reading the CRC requires the vblank interrupt handler to be - * enabled. Keep a reference until CRC capture stops. - */ - enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); if (!enabled && enable) { - ret = drm_crtc_vblank_get(crtc); - if (ret) - goto cleanup; - if (dm_is_crc_source_dprx(source)) { if (drm_dp_start_crc(aux, crtc)) { DRM_DEBUG_DRIVER("dp start crc failed\n"); @@ -402,6 +696,13 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) /* Reset crc_skipped on dm state */ crtc_state->crc_skip_count = 0; +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* Initialize phy id mapping table for secure display*/ + if (dm->secure_display_ctx.op_mode == LEGACY_MODE && + !dm->secure_display_ctx.phy_mapping_updated) + update_phy_id_mapping(adev); +#endif + cleanup: if (commit) drm_crtc_commit_put(commit); @@ -456,7 +757,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc) } if (dm_is_crc_source_crtc(cur_crc_src)) { - if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0, &crcs[0], &crcs[1], &crcs[2])) return; @@ -472,8 +773,17 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) enum amdgpu_dm_pipe_crc_source cur_crc_src; struct amdgpu_crtc *acrtc = NULL; struct amdgpu_device *adev = NULL; - struct secure_display_context *secure_display_ctx = NULL; + struct secure_display_crtc_context *crtc_ctx = NULL; + bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false}; + uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0}; + uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0}; + uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0}; unsigned long flags1; + bool forward_roi_change = false; + bool notify_ta = false; + bool all_crc_ready = true; + struct dc_stream_state *stream_state; + int i; if (crtc == NULL) return; @@ -481,78 +791,160 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) acrtc = to_amdgpu_crtc(crtc); adev = drm_to_adev(crtc->dev); drm_dev = crtc->dev; + stream_state = to_dm_crtc_state(crtc->state)->stream; spin_lock_irqsave(&drm_dev->event_lock, flags1); cur_crc_src = acrtc->dm_irq_params.crc_src; /* Early return if CRC capture is not enabled. */ if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) || - !dm_is_crc_source_crtc(cur_crc_src)) - goto cleanup; - - if (!acrtc->dm_irq_params.window_param.activated) - goto cleanup; + !dm_is_crc_source_crtc(cur_crc_src)) { + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + return; + } - if (acrtc->dm_irq_params.window_param.skip_frame_cnt) { - acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1; - goto cleanup; + if (!acrtc->dm_irq_params.crc_window_activated) { + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + return; } - secure_display_ctx = &adev->dm.secure_display_ctxs[acrtc->crtc_id]; - if (WARN_ON(secure_display_ctx->crtc != crtc)) { - /* We have set the crtc when creating secure_display_context, + crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id]; + if (WARN_ON(crtc_ctx->crtc != crtc)) { + /* We have set the crtc when creating secure_display_crtc_context, * don't expect it to be changed here. */ - secure_display_ctx->crtc = crtc; + crtc_ctx->crtc = crtc; } - if (acrtc->dm_irq_params.window_param.update_win) { - /* prepare work for dmub to update ROI */ - secure_display_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start; - secure_display_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start; - secure_display_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end - - acrtc->dm_irq_params.window_param.x_start; - secure_display_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end - - acrtc->dm_irq_params.window_param.y_start; - schedule_work(&secure_display_ctx->forward_roi_work); - - acrtc->dm_irq_params.window_param.update_win = false; + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + struct crc_params crc_window = { + .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start, + .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start, + .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end, + .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end, + .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start, + .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start, + .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end, + .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end, + }; + + crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable; + + if (!acrtc->dm_irq_params.window_param[i].enable) { + crtc_ctx->crc_info.crc[i].crc_ready = false; + continue; + } - /* Statically skip 1 frame, because we may need to wait below things - * before sending ROI to dmub: - * 1. We defer the work by using system workqueue. - * 2. We may need to wait for dc_lock before accessing dmub. - */ - acrtc->dm_irq_params.window_param.skip_frame_cnt = 1; + if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) { + acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1; + crtc_ctx->crc_info.crc[i].crc_ready = false; + continue; + } - } else { - /* prepare work for psp to read ROI/CRC and send to I2C */ - schedule_work(&secure_display_ctx->notify_ta_work); + if (acrtc->dm_irq_params.window_param[i].update_win) { + crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start; + crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start; + crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end - + crc_window.windowa_x_start; + crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end - + crc_window.windowa_y_start; + + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) + /* forward task to dmub to update ROI */ + forward_roi_change = true; + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) + /* update ROI via dm*/ + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, + &crc_window, true, true, i, false); + + reset_crc_frame_count[i] = true; + + acrtc->dm_irq_params.window_param[i].update_win = false; + + /* Statically skip 1 frame, because we may need to wait below things + * before sending ROI to dmub: + * 1. We defer the work by using system workqueue. + * 2. We may need to wait for dc_lock before accessing dmub. + */ + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1; + crtc_ctx->crc_info.crc[i].crc_ready = false; + } else { + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i, + &crc_r[i], &crc_g[i], &crc_b[i])) + DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i); + + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) + /* forward task to psp to read ROI/CRC and output via I2C */ + notify_ta = true; + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) + /* Avoid ROI window get changed, keep overwriting. */ + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, + &crc_window, true, true, i, false); + + /* crc ready for psp to read out */ + crtc_ctx->crc_info.crc[i].crc_ready = true; + } } -cleanup: spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + + if (forward_roi_change) + schedule_work(&crtc_ctx->forward_roi_work); + + if (notify_ta) + schedule_work(&crtc_ctx->notify_ta_work); + + spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1); + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + crtc_ctx->crc_info.crc[i].crc_R = crc_r[i]; + crtc_ctx->crc_info.crc[i].crc_G = crc_g[i]; + crtc_ctx->crc_info.crc[i].crc_B = crc_b[i]; + + if (!crtc_ctx->roi[i].enable) { + crtc_ctx->crc_info.crc[i].frame_count = 0; + continue; + } + + if (!crtc_ctx->crc_info.crc[i].crc_ready) + all_crc_ready = false; + + if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX) + /* Reset the reference frame count after user update the ROI + * or it reaches the maximum value. + */ + crtc_ctx->crc_info.crc[i].frame_count = 0; + else + crtc_ctx->crc_info.crc[i].frame_count += 1; + } + spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1); + + if (all_crc_ready) + complete_all(&crtc_ctx->crc_info.completion); } -struct secure_display_context * -amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) +void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) { - struct secure_display_context *secure_display_ctxs = NULL; + struct secure_display_crtc_context *crtc_ctx = NULL; int i; - secure_display_ctxs = kcalloc(adev->mode_info.num_crtc, - sizeof(struct secure_display_context), + crtc_ctx = kcalloc(adev->mode_info.num_crtc, + sizeof(struct secure_display_crtc_context), GFP_KERNEL); - if (!secure_display_ctxs) - return NULL; + if (!crtc_ctx) { + adev->dm.secure_display_ctx.crtc_ctx = NULL; + return; + } for (i = 0; i < adev->mode_info.num_crtc; i++) { - INIT_WORK(&secure_display_ctxs[i].forward_roi_work, amdgpu_dm_forward_crc_window); - INIT_WORK(&secure_display_ctxs[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); - secure_display_ctxs[i].crtc = &adev->mode_info.crtcs[i]->base; + INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window); + INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); + crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base; + spin_lock_init(&crtc_ctx[i].crc_info.lock); } - return secure_display_ctxs; + adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx; + + adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 748e80ef40d0..3da056c8d20b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -40,20 +40,53 @@ enum amdgpu_dm_pipe_crc_source { }; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY +#define MAX_CRTC 6 + +enum secure_display_mode { + /* via dmub + psp */ + LEGACY_MODE = 0, + /* driver directly */ + DISPLAY_CRC_MODE, + SECURE_DISPLAY_MODE_MAX, +}; + +struct phy_id_mapping { + bool assigned; + bool is_mst; + uint8_t enc_hw_inst; + u8 lct; + u8 port_num; + u8 rad[8]; +}; + +struct crc_data { + uint32_t crc_R; + uint32_t crc_G; + uint32_t crc_B; + uint32_t frame_count; + bool crc_ready; +}; + +struct crc_info { + struct crc_data crc[MAX_CRC_WINDOW_NUM]; + struct completion completion; + spinlock_t lock; +}; + struct crc_window_param { uint16_t x_start; uint16_t y_start; uint16_t x_end; uint16_t y_end; /* CRC window is activated or not*/ - bool activated; + bool enable; /* Update crc window during vertical blank or not */ bool update_win; /* skip reading/writing for few frames */ int skip_frame_cnt; }; -struct secure_display_context { +struct secure_display_crtc_context { /* work to notify PSP TA*/ struct work_struct notify_ta_work; @@ -63,7 +96,20 @@ struct secure_display_context { struct drm_crtc *crtc; /* Region of Interest (ROI) */ - struct rect rect; + struct crc_window roi[MAX_CRC_WINDOW_NUM]; + + struct crc_info crc_info; +}; + +struct secure_display_context { + + struct secure_display_crtc_context *crtc_ctx; + /* Whether dmub support multiple ROI setting */ + bool support_mul_roi; + enum secure_display_mode op_mode; + bool phy_mapping_updated; + int phy_id_mapping_cnt; + struct phy_id_mapping phy_id_mapping[MAX_CRTC]; }; #endif @@ -95,8 +141,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc); #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc); void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc); -struct secure_display_context *amdgpu_dm_crtc_secure_display_create_contexts( - struct amdgpu_device *adev); +void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev); #else #define amdgpu_dm_crc_window_is_activated(x) #define amdgpu_dm_crtc_handle_crc_window_irq(x) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 64a041c2af05..36a830a7440f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -93,7 +93,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable) return rc; } -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) { return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; @@ -142,7 +142,7 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( amdgpu_dm_replay_enable(vblank_work->stream, true); } else if (vblank_enabled) { if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) - amdgpu_dm_psr_disable(vblank_work->stream); + amdgpu_dm_psr_disable(vblank_work->stream, false); } else if (link->psr_settings.psr_feature_enabled && allow_sr_entry && !is_sr_active && !is_crc_window_active) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h index 17e948753f59..c1212947a77b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable); bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc); -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state); +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state); int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 6a97bb2d9160..049046c60462 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -25,6 +25,7 @@ #include <linux/string_helpers.h> #include <linux/uaccess.h> +#include <media/cec-notifier.h> #include "dc.h" #include "amdgpu.h" @@ -258,7 +259,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, struct dc_link *link = connector->dc_link; struct amdgpu_device *adev = drm_to_adev(connector->base.dev); struct dc *dc = (struct dc *)link->dc; - struct dc_link_settings prefer_link_settings; + struct dc_link_settings prefer_link_settings = {0}; char *wr_buf = NULL; const uint32_t wr_buf_size = 40; /* 0: lane_count; 1: link_rate */ @@ -389,7 +390,7 @@ static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf, struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev); struct dc *dc = (struct dc *)link->dc; - struct dc_link_settings prefer_link_settings; + struct dc_link_settings prefer_link_settings = {0}; char *wr_buf = NULL; const uint32_t wr_buf_size = 40; /* 0: lane_count; 1: link_rate */ @@ -613,7 +614,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, uint32_t wr_buf_size = 40; long param[3]; bool use_prefer_link_setting; - struct link_training_settings link_lane_settings; + struct link_training_settings link_lane_settings = {0}; int max_param_num = 3; uint8_t param_nums = 0; int r = 0; @@ -768,7 +769,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED}; struct dc_link_settings cur_link_settings = {LANE_COUNT_UNKNOWN, LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED}; - struct link_training_settings link_training_settings; + struct link_training_settings link_training_settings = {0}; int i; if (size == 0) @@ -902,9 +903,10 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data) { struct amdgpu_device *adev = m->private; struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info; + struct dmub_fw_meta_info *fw_meta_info = NULL; struct dmub_debugfs_trace_entry *entries; uint8_t *tbuf_base; - uint32_t tbuf_size, max_entries, num_entries, i; + uint32_t tbuf_size, max_entries, num_entries, first_entry, i; if (!fb_info) return 0; @@ -913,20 +915,42 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data) if (!tbuf_base) return 0; - tbuf_size = fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size; + if (adev->dm.dmub_srv) + fw_meta_info = &adev->dm.dmub_srv->meta_info; + + tbuf_size = fw_meta_info ? fw_meta_info->trace_buffer_size : + DMUB_TRACE_BUFFER_SIZE; max_entries = (tbuf_size - sizeof(struct dmub_debugfs_trace_header)) / sizeof(struct dmub_debugfs_trace_entry); num_entries = ((struct dmub_debugfs_trace_header *)tbuf_base)->entry_count; + /* DMCUB tracebuffer is a ring. If it rolled over, print a hint that + * entries are being overwritten. + */ + if (num_entries > max_entries) + seq_printf(m, "...\n"); + + first_entry = num_entries % max_entries; num_entries = min(num_entries, max_entries); entries = (struct dmub_debugfs_trace_entry *)(tbuf_base + sizeof(struct dmub_debugfs_trace_header)); - for (i = 0; i < num_entries; ++i) { + /* To print entries chronologically, start from the first entry till the + * top of buffer, then from base of buffer to first entry. + */ + for (i = first_entry; i < num_entries; ++i) { + struct dmub_debugfs_trace_entry *entry = &entries[i]; + + seq_printf(m, + "trace_code=%u tick_count=%u param0=%u param1=%u\n", + entry->trace_code, entry->tick_count, entry->param0, + entry->param1); + } + for (i = 0; i < first_entry; ++i) { struct dmub_debugfs_trace_entry *entry = &entries[i]; seq_printf(m, @@ -2825,6 +2849,67 @@ static int is_dpia_link_show(struct seq_file *m, void *data) return 0; } +/** + * hdmi_cec_state_show - Read out the HDMI-CEC feature status + * @m: sequence file. + * @data: unused. + * + * Return 0 on success + */ +static int hdmi_cec_state_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + + seq_printf(m, "%s:%d\n", connector->name, connector->base.id); + seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0); + + return 0; +} + +/** + * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side + * @f: file structure. + * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature. + * @size: size of buffer from userpsace. + * @pos: unused. + * + * Return size on success, error code on failure + */ +static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int ret; + bool enable; + struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; + struct drm_device *ddev = aconnector->base.dev; + + if (size == 0) + return -EINVAL; + + ret = kstrtobool_from_user(buf, size, &enable); + if (ret) { + drm_dbg_driver(ddev, "invalid user data !\n"); + return ret; + } + + if (enable) { + if (aconnector->notifier) + return -EINVAL; + ret = amdgpu_dm_initialize_hdmi_connector(aconnector); + if (ret) + return ret; + hdmi_cec_set_edid(aconnector); + } else { + if (!aconnector->notifier) + return -EINVAL; + cec_notifier_conn_unregister(aconnector->notifier); + aconnector->notifier = NULL; + } + + return size; +} + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2837,6 +2922,7 @@ DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); DEFINE_SHOW_ATTRIBUTE(is_dpia_link); +DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2972,7 +3058,8 @@ static const struct { char *name; const struct file_operations *fops; } hdmi_debugfs_entries[] = { - {"hdcp_sink_capability", &hdcp_sink_capability_fops} + {"hdcp_sink_capability", &hdcp_sink_capability_fops}, + {"hdmi_cec_state", &hdmi_cec_state_fops} }; /* @@ -3457,8 +3544,8 @@ static int crc_win_x_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.x_start = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3474,7 +3561,7 @@ static int crc_win_x_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.x_start; + *val = acrtc->dm_irq_params.window_param[0].x_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3494,8 +3581,8 @@ static int crc_win_y_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.y_start = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3511,7 +3598,7 @@ static int crc_win_y_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.y_start; + *val = acrtc->dm_irq_params.window_param[0].y_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3530,8 +3617,8 @@ static int crc_win_x_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.x_end = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3547,7 +3634,7 @@ static int crc_win_x_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.x_end; + *val = acrtc->dm_irq_params.window_param[0].x_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3566,8 +3653,8 @@ static int crc_win_y_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.y_end = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3583,7 +3670,7 @@ static int crc_win_y_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.y_end; + *val = acrtc->dm_irq_params.window_param[0].y_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3606,13 +3693,14 @@ static int crc_win_update_set(void *data, u64 val) /* PSR may write to OTG CRC window control register, * so close it before starting secure_display. */ - amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream); + amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true); spin_lock_irq(&adev_to_drm(adev)->event_lock); - acrtc->dm_irq_params.window_param.activated = true; - acrtc->dm_irq_params.window_param.update_win = true; - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; + acrtc->dm_irq_params.window_param[0].enable = true; + acrtc->dm_irq_params.window_param[0].update_win = true; + acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0; + acrtc->dm_irq_params.crc_window_activated = true; spin_unlock_irq(&adev_to_drm(adev)->event_lock); mutex_unlock(&adev->dm.dc_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 6cbbb71d752b..fbd80d8545a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -885,6 +885,12 @@ bool dm_helpers_dp_write_dsc_enable( return ret; } +bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream) +{ + // TODO + return false; +} + bool dm_helpers_is_dp_sink_present(struct dc_link *link) { bool dp_sink_present; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index 6a7ecc1e4602..6c9de834455b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -39,7 +39,9 @@ struct dm_irq_params { #ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source crc_src; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY - struct crc_window_param window_param; + struct crc_window_param window_param[MAX_CRC_WINDOW_NUM]; + /* At least one CRC window is activated or not*/ + bool crc_window_activated; #endif #endif }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index d398bc74e667..07e744da7bf4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -155,6 +155,17 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) return 0; } + +static inline void +amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector) +{ + aconnector->drm_edid = NULL; + aconnector->dsc_aux = NULL; + aconnector->mst_output_port->passthrough_aux = NULL; + aconnector->mst_local_bw = 0; + aconnector->vc_full_pbn = 0; +} + static void amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) { @@ -182,9 +193,7 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) dc_sink_release(dc_sink); aconnector->dc_sink = NULL; - aconnector->drm_edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); } aconnector->mst_status = MST_STATUS_DEFAULT; @@ -504,9 +513,7 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; - aconnector->drm_edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); amdgpu_dm_set_mst_status(&aconnector->mst_status, MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, @@ -1689,16 +1696,16 @@ clean_exit: return ret; } -static unsigned int kbps_from_pbn(unsigned int pbn) +static uint32_t kbps_from_pbn(unsigned int pbn) { - unsigned int kbps = pbn; + uint64_t kbps = (uint64_t)pbn; kbps *= (1000000 / PEAK_FACTOR_X1000); kbps *= 8; kbps *= 54; kbps /= 64; - return kbps; + return (uint32_t)kbps; } static bool is_dsc_common_config_possible(struct dc_stream_state *stream, @@ -1820,9 +1827,18 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct drm_dp_mst_port *immediate_upstream_port = NULL; uint32_t end_link_bw = 0; - /*Get last DP link BW capability*/ - if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { - if (stream_kbps > end_link_bw) { + /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/ + if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV && + aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) { + if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) { + dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw); + aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn; + aconnector->mst_local_bw = end_link_bw; + } else { + end_link_bw = aconnector->mst_local_bw; + } + + if (end_link_bw > 0 && stream_kbps > end_link_bw) { DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; @@ -1836,11 +1852,15 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (immediate_upstream_port) { virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); - if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." - "Max dsc compression can't fit into MST available bw\n"); - return DC_FAIL_BANDWIDTH_VALIDATE; - } + } else { + /* For topology LCT 1 case - only one mstb*/ + virtual_channel_bw_in_kbps = root_link_bw_in_kbps; + } + + if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Max dsc compression can't fit into MST available bw\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 495e3cd70426..774cc3f4f3fd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -26,6 +26,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include "drm/drm_framebuffer.h" #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -176,7 +177,7 @@ static unsigned int amdgpu_dm_plane_modifier_gfx9_swizzle_mode(uint64_t modifier return AMD_FMT_MOD_GET(TILE, modifier); } -static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, +static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info, uint64_t tiling_flags) { /* Fill GFX8 params */ @@ -189,6 +190,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + tiling_info->gfxversion = DcGfxVersion8; /* XXX fix me for VI */ tiling_info->gfx8.num_banks = num_banks; tiling_info->gfx8.array_mode = @@ -209,7 +211,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf } static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info) + struct dc_tiling_info *tiling_info) { /* Fill GFX9 params */ tiling_info->gfx9.num_pipes = @@ -230,7 +232,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgp } static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, uint64_t modifier) { unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); @@ -260,7 +262,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amd static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, - const union dc_tiling_info *tiling_info, + const struct dc_tiling_info *tiling_info, const struct dc_plane_dcc_param *dcc, const struct dc_plane_address *address, const struct plane_size *plane_size) @@ -307,18 +309,18 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); + tiling_info->gfxversion = DcGfxVersion9; - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { uint64_t dcc_address = afb->address + afb->base.offsets[1]; bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); @@ -358,10 +360,9 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; @@ -370,8 +371,9 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); + tiling_info->gfxversion = DcGfxAddr3; - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); dcc->enable = 1; @@ -835,12 +837,11 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = &afb->base; int ret; @@ -900,16 +901,14 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else if (adev->family >= AMDGPU_FAMILY_AI) { ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else { @@ -1000,14 +999,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { struct dc_plane_state *plane_state = dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; amdgpu_dm_plane_fill_plane_buffer_attributes( adev, afb, plane_state->format, plane_state->rotation, afb->tiling_flags, &plane_state->tiling_info, &plane_state->plane_size, &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); + afb->tmz_surface); } return 0; @@ -1421,6 +1419,20 @@ static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane, amdgpu_dm_plane_handle_cursor_update(plane, old_state); } +static void amdgpu_dm_plane_panic_flush(struct drm_plane *plane) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane->state); + struct drm_framebuffer *fb = plane->state->fb; + struct dc_plane_state *dc_plane_state; + + if (!dm_plane_state || !dm_plane_state->dc_state) + return; + + dc_plane_state = dm_plane_state->dc_state; + + dc_plane_force_update_for_panic(dc_plane_state, fb->modifier ? true : false); +} + static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .prepare_fb = amdgpu_dm_plane_helper_prepare_fb, .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb, @@ -1429,6 +1441,16 @@ static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .atomic_async_update = amdgpu_dm_plane_atomic_async_update }; +static const struct drm_plane_helper_funcs dm_primary_plane_helper_funcs = { + .prepare_fb = amdgpu_dm_plane_helper_prepare_fb, + .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb, + .atomic_check = amdgpu_dm_plane_atomic_check, + .atomic_async_check = amdgpu_dm_plane_atomic_async_check, + .atomic_async_update = amdgpu_dm_plane_atomic_async_update, + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = amdgpu_dm_plane_panic_flush, +}; + static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) { struct dm_plane_state *amdgpu_state = NULL; @@ -1855,7 +1877,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, plane->type != DRM_PLANE_TYPE_CURSOR) drm_plane_enable_fb_damage_clips(plane); - drm_plane_helper_add(plane, &dm_plane_helper_funcs); + if (plane->type == DRM_PLANE_TYPE_PRIMARY) + drm_plane_helper_add(plane, &dm_primary_plane_helper_funcs); + else + drm_plane_helper_add(plane, &dm_plane_helper_funcs); #ifdef AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 6498359bff6f..615d2ab2b803 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -47,12 +47,11 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc); + bool tmz_surface); int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f40240aafe98..45858bf1523d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -201,14 +201,13 @@ void amdgpu_dm_psr_enable(struct dc_stream_state *stream) * * Return: true if success */ -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait) { - unsigned int power_opt = 0; bool psr_enable = false; DRM_DEBUG_DRIVER("Disabling psr...\n"); - return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt); + return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL); } /* @@ -251,3 +250,33 @@ bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm) return allow_active; } + +/** + * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR + * @stream: stream state attached to the eDP link + * + * Waits for a max of 500ms for the eDP panel to exit PSR. + * + * Return: true if panel exited PSR, false otherwise. + */ +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream) +{ + enum dc_psr_state psr_state = PSR_STATE0; + struct dc_link *link = stream->link; + int retry_count; + + if (link == NULL) + return false; + + for (retry_count = 0; retry_count <= 1000; retry_count++) { + dc_link_get_psr_state(link, &psr_state); + if (psr_state == PSR_STATE0) + break; + udelay(500); + } + + if (retry_count == 1000) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index cd2d45c2b5ef..e2366321a3c1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -34,8 +34,9 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link); void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm); +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream); #endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index c9a6de110b74..a62f6c51301c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3088,11 +3088,12 @@ static enum bp_result construct_integrated_info( info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id, info->ext_disp_conn_info.path[i].caps ); - if (info->ext_disp_conn_info.path[i].caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) - DC_LOG_BIOS("BIOS EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); + if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) + DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); else if (bp->base.ctx->dc->config.force_bios_fixed_vs) { - info->ext_disp_conn_info.path[i].caps |= EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; - DC_LOG_BIOS("driver forced EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); + info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; + DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); } } // Log the Checksum and Voltage Swing diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index ab1132bc896a..d9955c5d2e5e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) ############################################################################### # DCN35 ############################################################################### -CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o +CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35)) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 0e243f4344d0..4c3e58c730b1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_3_51) + dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + else + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base.base; } break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index 7920f6f1aa62..76c612ecfe3c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -34,8 +34,8 @@ #include "dm_services.h" #include "cyan_skillfish_ip_offset.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "clk/clk_11_0_1_offset.h" #include "clk/clk_11_0_1_sh_mask.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c new file mode 100644 index 000000000000..6a6ae618650b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c @@ -0,0 +1,140 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "core_types.h" +#include "dcn35_clk_mgr.h" + +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK4_CURRENT_CNT 0x16F00 +#define mmCLK1_CLK5_CURRENT_CNT 0x16F01 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B04B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L + +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L + +#define REG(reg) \ + (clk_mgr->regs->reg) + +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name + +static const struct clk_mgr_registers clk_mgr_regs_dcn351 = { + CLK_REG_LIST_DCN35() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define TO_CLK_MGR_DCN35(clk_mgr)\ + container_of(clk_mgr, struct clk_mgr_dcn35, base) + + +void dcn351_clk_mgr_construct( + struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg) +{ + /*register offset changed*/ + clk_mgr->base.regs = &clk_mgr_regs_dcn351; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351; + + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + +} + + diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index b77333817f18..1648226586e2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -36,15 +36,11 @@ #include "dcn20/dcn20_clk_mgr.h" - - #include "reg_helper.h" #include "core_types.h" #include "dcn35_smu.h" #include "dm_helpers.h" -/* TODO: remove this include once we ported over remaining clk mgr functions*/ -#include "dcn30/dcn30_clk_mgr.h" #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" @@ -55,35 +51,102 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK5_CURRENT_CNT 0x16F00 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B24B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L -#define regCLK1_CLK_PLL_REQ 0x0237 -#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name -#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define REG(reg) \ + (clk_mgr->regs->reg) -#define regCLK1_CLK2_BYPASS_CNTL 0x029c -#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define BASE(seg) BASE_INNER(seg) -#define regCLK5_0_CLK5_spll_field_8 0x464b -#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0 +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name -#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +static const struct clk_mgr_registers clk_mgr_regs_dcn35 = { + CLK_REG_LIST_DCN35() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; -#define REG(reg_name) \ - (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) +static const struct clk_mgr_mask clk_mgr_mask_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; #define TO_CLK_MGR_DCN35(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn35, base) @@ -338,6 +401,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk) dcn35_smu_set_dtbclk(clk_mgr, false); + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; } /* check that we're not already in lower */ @@ -355,11 +419,17 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { - dcn35_smu_set_dtbclk(clk_mgr, true); - clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + int actual_dtbclk = 0; dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); - clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; + dcn35_smu_set_dtbclk(clk_mgr, true); + + actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT); + + if (actual_dtbclk) { + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + } } /* check that we're not already in D0 */ @@ -452,7 +522,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) struct fixed31_32 pll_req; unsigned int fbmult_frac_val = 0; unsigned int fbmult_int_val = 0; - struct dc_context *ctx = clk_mgr->base.ctx; /* * Register value of fbmult is in 8.16 format, we are converting to 314.32 @@ -512,22 +581,20 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct dc_context *ctx = clk_mgr->base.ctx; + uint32_t ssc_enable; - REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable); + ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; - return ssc_enable == 1; + return ssc_enable != 0; } static void init_clk_states(struct clk_mgr *clk_mgr) { - struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); - if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) - clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; @@ -538,6 +605,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + init_clk_states(clk_mgr); // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk @@ -632,6 +700,7 @@ static struct wm_table lpddr5_wm_table = { }; static DpmClocks_t_dcn35 dummy_clocks; +static DpmClocks_t_dcn351 dummy_clocks_dcn351; static struct dcn35_watermarks dummy_wms = { 0 }; @@ -642,10 +711,10 @@ static struct dcn35_ss_info_table ss_info_table = { static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) { - struct dc_context *ctx = clk_mgr->base.ctx; - uint32_t clock_source; + uint32_t clock_source = 0; + + clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK; - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); // If it's DFS mode, clock_source is 0. if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; @@ -755,6 +824,22 @@ static void dcn35_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr); } +static void dcn351_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, + struct dcn351_smu_dpm_clks *smu_dpm_clks) +{ + DpmClocks_t_dcn351 *table = smu_dpm_clks->dpm_clks; + + if (!clk_mgr->smu_ver) + return; + if (!table || smu_dpm_clks->mc_address.quad_part == 0) + return; + memset(table, 0, sizeof(*table)); + dcn35_smu_set_dram_addr_high(clk_mgr, + smu_dpm_clks->mc_address.high_part); + dcn35_smu_set_dram_addr_low(clk_mgr, + smu_dpm_clks->mc_address.low_part); + dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr); +} static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) { uint32_t max = 0; @@ -1093,6 +1178,57 @@ struct clk_mgr_funcs dcn35_fpga_funcs = { .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, }; +static void translate_to_DpmClocks_t_dcn35(struct dcn351_smu_dpm_clks *smu_dpm_clks_a, + struct dcn35_smu_dpm_clks *smu_dpm_clks_b) +{ + /*translate two structures and only take need clock tables*/ + uint8_t i; + + if (smu_dpm_clks_a == NULL || smu_dpm_clks_b == NULL || + smu_dpm_clks_a->dpm_clks == NULL || smu_dpm_clks_b->dpm_clks == NULL) + return; + + for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DcfClocks[i] = smu_dpm_clks_a->dpm_clks->DcfClocks[i]; + + for (i = 0; i < NUM_DISPCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DispClocks[i] = smu_dpm_clks_a->dpm_clks->DispClocks[i]; + + for (i = 0; i < NUM_DPPCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DppClocks[i] = smu_dpm_clks_a->dpm_clks->DppClocks[i]; + + for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->FclkClocks_Freq[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Freq[i]; + smu_dpm_clks_b->dpm_clks->FclkClocks_Voltage[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Voltage[i]; + } + for (i = 0; i < NUM_MEM_PSTATE_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].MemClk = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].MemClk; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].UClk = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].UClk; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].Voltage = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].Voltage; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].WckRatio = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].WckRatio; + } + smu_dpm_clks_b->dpm_clks->MaxGfxClk = smu_dpm_clks_a->dpm_clks->MaxGfxClk; + smu_dpm_clks_b->dpm_clks->MinGfxClk = smu_dpm_clks_a->dpm_clks->MinGfxClk; + smu_dpm_clks_b->dpm_clks->NumDcfClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumDcfClkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumDispClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumDispClkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumFclkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumFclkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumMemPstatesEnabled = + smu_dpm_clks_a->dpm_clks->NumMemPstatesEnabled; + smu_dpm_clks_b->dpm_clks->NumSocClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumSocClkLevelsEnabled; + + for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->SocClocks[i] = smu_dpm_clks_a->dpm_clks->SocClocks[i]; + smu_dpm_clks_b->dpm_clks->SocVoltage[i] = smu_dpm_clks_a->dpm_clks->SocVoltage[i]; + } +} void dcn35_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn35 *clk_mgr, @@ -1100,6 +1236,7 @@ void dcn35_clk_mgr_construct( struct dccg *dccg) { struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 }; + struct dcn351_smu_dpm_clks smu_dpm_clks_dcn351 = { 0 }; clk_mgr->base.base.ctx = ctx; clk_mgr->base.base.funcs = &dcn35_funcs; @@ -1112,6 +1249,12 @@ void dcn35_clk_mgr_construct( clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; clk_mgr->base.dfs_ref_freq_khz = 48000; + if (ctx->dce_version == DCN_VERSION_3_5) { + clk_mgr->base.regs = &clk_mgr_regs_dcn35; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35; + } + clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, @@ -1130,14 +1273,24 @@ void dcn35_clk_mgr_construct( DC_MEM_ALLOC_TYPE_GART, sizeof(DpmClocks_t_dcn35), &smu_dpm_clks.mc_address.quad_part); - if (smu_dpm_clks.dpm_clks == NULL) { smu_dpm_clks.dpm_clks = &dummy_clocks; smu_dpm_clks.mc_address.quad_part = 0; } - ASSERT(smu_dpm_clks.dpm_clks); + if (ctx->dce_version == DCN_VERSION_3_51) { + smu_dpm_clks_dcn351.dpm_clks = (DpmClocks_t_dcn351 *)dm_helpers_allocate_gpu_mem( + clk_mgr->base.base.ctx, + DC_MEM_ALLOC_TYPE_GART, + sizeof(DpmClocks_t_dcn351), + &smu_dpm_clks_dcn351.mc_address.quad_part); + if (smu_dpm_clks_dcn351.dpm_clks == NULL) { + smu_dpm_clks_dcn351.dpm_clks = &dummy_clocks_dcn351; + smu_dpm_clks_dcn351.mc_address.quad_part = 0; + } + } + clk_mgr->base.smu_ver = dcn35_smu_get_smu_version(&clk_mgr->base); if (clk_mgr->base.smu_ver) @@ -1166,7 +1319,11 @@ void dcn35_clk_mgr_construct( if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { int i; - dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); + if (ctx->dce_version == DCN_VERSION_3_51) { + dcn351_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks_dcn351); + translate_to_DpmClocks_t_dcn35(&smu_dpm_clks_dcn351, &smu_dpm_clks); + } else + dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n" "NumDispClkLevelsEnabled: %d\n" "NumSocClkLevelsEnabled: %d\n" @@ -1227,6 +1384,10 @@ void dcn35_clk_mgr_construct( dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, smu_dpm_clks.dpm_clks); + if (smu_dpm_clks_dcn351.dpm_clks && smu_dpm_clks_dcn351.mc_address.quad_part != 0) + dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, + smu_dpm_clks_dcn351.dpm_clks); + if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { bool ips_support = false; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h index 1203dc605b12..a12a9bf90806 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h @@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx, void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); +void dcn351_clk_mgr_construct(struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg); #endif //__DCN35_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h index 3fae13c73934..ab9d21ba0c43 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h @@ -126,18 +126,31 @@ typedef struct { uint32_t MaxGfxClk; } DpmClocks_t_dcn35; - -// Throttler Status Bitmask - - - - - - - - - - +typedef struct { + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; + uint32_t VClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t VClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t VPEClocks[NUM_VPE_DPM_LEVELS]; + uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS]; + uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS]; + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; + MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS]; + uint8_t NumDcfClkLevelsEnabled; + uint8_t NumDispClkLevelsEnabled; // Applies to both Dispclk and Dppclk + uint8_t NumSocClkLevelsEnabled; + uint8_t Vcn0ClkLevelsEnabled; // Applies to both Vclk0 and Dclk0 + uint8_t Vcn1ClkLevelsEnabled; // Applies to both Vclk1 and Dclk1 + uint8_t VpeClkLevelsEnabled; + uint8_t NumMemPstatesEnabled; + uint8_t NumFclkLevelsEnabled; + uint32_t MinGfxClk; + uint32_t MaxGfxClk; +} DpmClocks_t_dcn351; #define TABLE_BIOS_IF 0 // Called by BIOS #define TABLE_WATERMARKS 1 // Called by DAL through VBIOS @@ -163,6 +176,10 @@ struct dcn35_smu_dpm_clks { union large_integer mc_address; }; +struct dcn351_smu_dpm_clks { + DpmClocks_t_dcn351 *dpm_clks; + union large_integer mc_address; +}; /* TODO: taken from vgh, may not be correct */ struct display_idle_optimization { unsigned int df_request_disabled : 1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h index dbfdd3487da5..2e0d34fd7512 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h @@ -43,7 +43,9 @@ #define DALSMC_MSG_ActiveUclkFclk 0x18 #define DALSMC_MSG_IdleUclkFclk 0x19 #define DALSMC_MSG_SetUclkPstateAllow 0x1A -#define DALSMC_Message_Count 0x1B +#define DALSMC_MSG_SubvpUclkFclk 0x1B +#define DALSMC_MSG_GetNumUmcChannels 0x1C +#define DALSMC_Message_Count 0x1D typedef enum { FCLK_SWITCH_DISALLOW, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 8cfc5f435937..8082bb877611 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -141,6 +141,20 @@ static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, P return ppclk_idle_dpm_enabled; } +static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr) +{ + bool is_df_throttle_opt_enabled = false; + + if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) && + clk_mgr->smu_ver >= 0x663500) { + is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle; + } + + is_df_throttle_opt_enabled &= clk_mgr->smu_present; + + return is_df_throttle_opt_enabled; +} + /* Query SMU for all clock states for a particular clock */ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0, unsigned int *num_levels) @@ -614,207 +628,6 @@ static void dcn401_update_clocks_update_dentist( } -static void dcn401_update_clocks_legacy(struct clk_mgr *clk_mgr_base, - struct dc_state *context, - bool safe_to_lower) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; - struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; - bool update_dppclk = false; - bool update_dispclk = false; - bool enter_display_off = false; - bool dpp_clock_lowered = false; - struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - bool force_reset = false; - bool update_uclk = false, update_fclk = false; - bool p_state_change_support; - bool fclk_p_state_change_support; - int total_plane_count; - - if (dc->work_arounds.skip_clock_update) - return; - - if (clk_mgr_base->clks.dispclk_khz == 0 || - (dc->debug.force_clock_mode & 0x1)) { - /* This is from resume or boot up, if forced_clock cfg option used, - * we bypass program dispclk and DPPCLK, but need set them for S3. - */ - force_reset = true; - - dcn2_read_clocks_from_hw_dentist(clk_mgr_base); - - /* Force_clock_mode 0x1: force reset the clock even it is the same clock - * as long as it is in Passive level. - */ - } - display_count = clk_mgr_helper_get_active_display_cnt(dc, context); - - if (display_count == 0) - enter_display_off = true; - - if (clk_mgr->smu_present) { - if (enter_display_off == safe_to_lower) - dcn401_smu_set_num_of_displays(clk_mgr, display_count); - - clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support; - - total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); - fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0); - - if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support)) { - clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support; - - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ - if (clk_mgr_base->clks.fclk_p_state_change_support) { - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ - dcn401_smu_send_fclk_pstate_message(clk_mgr, true); - } - } - - if (dc->debug.force_min_dcfclk_mhz > 0) - new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? - new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); - - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { - clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); - } - - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { - clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) - dcn401_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); - } - - if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) - /* We don't actually care about socclk, don't notify SMU of hard min */ - clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; - - clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; - clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways; - - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && - clk_mgr_base->clks.num_ways < new_clocks->num_ways) { - clk_mgr_base->clks.num_ways = new_clocks->num_ways; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); - } - - - p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); - if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) { - clk_mgr_base->clks.p_state_change_support = p_state_change_support; - clk_mgr_base->clks.fw_based_mclk_switching = p_state_change_support && new_clocks->fw_based_mclk_switching; - - /* to disable P-State switching, set UCLK min = max */ - if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); - } - - /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */ - if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) { - update_fclk = true; - } - - if (!clk_mgr_base->clks.fclk_p_state_change_support && - update_fclk && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_FCLK)) { - /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */ - dcn401_smu_send_fclk_pstate_message(clk_mgr, false); - } - - /* Always update saved value, even if new value not set due to P-State switching unsupported */ - if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { - clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; - update_uclk = true; - } - - /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ - if (clk_mgr_base->clks.p_state_change_support && - (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && - clk_mgr_base->clks.num_ways > new_clocks->num_ways) { - clk_mgr_base->clks.num_ways = new_clocks->num_ways; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); - } - } - - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { - if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) - dpp_clock_lowered = true; - - clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; - clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz; - - if (clk_mgr->smu_present && !dpp_clock_lowered && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, clk_mgr_base->clks.dppclk_khz); - update_dppclk = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK)) - clk_mgr_base->clks.actual_dispclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, clk_mgr_base->clks.dispclk_khz); - - update_dispclk = true; - } - - if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { - new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; - } - - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ - if (!dc->debug.disable_dtb_ref_clk_switch && - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { - /* DCCG requires KHz precision for DTBCLK */ - clk_mgr_base->clks.ref_dtbclk_khz = - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); - - dcn401_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); - } - - if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { - if (dpp_clock_lowered) { - /* if clock is being lowered, increase DTO before lowering refclk */ - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, - safe_to_lower, clk_mgr_base->clks.dppclk_khz); - dcn401_update_clocks_update_dentist(clk_mgr, context); - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) { - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, - clk_mgr_base->clks.dppclk_khz); - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower, - clk_mgr_base->clks.actual_dppclk_khz); - } - - } else { - /* if clock is being raised, increase refclk before lowering DTO */ - if (update_dppclk || update_dispclk) - dcn401_update_clocks_update_dentist(clk_mgr, context); - /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures - * that we do not lower dto when it is not safe to lower. We do not need to - * compare the current and new dppclk before calling this function. - */ - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, - safe_to_lower, clk_mgr_base->clks.actual_dppclk_khz); - } - } - - if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) - /*update dmcu for wait_loop count*/ - dmcu->funcs->set_psr_wait_loop(dmcu, - clk_mgr_base->clks.dispclk_khz / 1000 / 7); -} - static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -869,6 +682,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned params->update_idle_hardmin_params.uclk_mhz, params->update_idle_hardmin_params.fclk_mhz); break; + case CLK_MGR401_UPDATE_SUBVP_HARDMINS: + dcn401_smu_set_subvp_uclk_fclk_hardmin( + clk_mgr_internal, + params->update_idle_hardmin_params.uclk_mhz, + params->update_idle_hardmin_params.fclk_mhz); + break; case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK: dcn401_smu_set_min_deep_sleep_dcef_clk( clk_mgr_internal, @@ -945,15 +764,21 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( bool update_active_uclk = false; bool update_idle_fclk = false; bool update_idle_uclk = false; + bool update_subvp_prefetch_dramclk = false; + bool update_subvp_prefetch_fclk = false; bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) && dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) && dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK); + bool is_df_throttle_opt_enabled = is_idle_dpm_enabled && + dcn401_is_df_throttle_opt_enabled(clk_mgr_internal); int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz); int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz); int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz); int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz); + int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz); + int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz); unsigned int num_steps = 0; @@ -982,15 +807,15 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( update_active_fclk = true; update_idle_fclk = true; - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ - if (clk_mgr_base->clks.fclk_p_state_change_support) { - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ - if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { - block_sequence[num_steps].params.update_pstate_support_params.support = true; - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; - num_steps++; - } - } + /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/ + // if (clk_mgr_base->clks.fclk_p_state_change_support) { + // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ + // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { + // block_sequence[num_steps].params.update_pstate_support_params.support = true; + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; + // num_steps++; + // } + // } } if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { @@ -1109,6 +934,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( } } + if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) { + clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz; + update_subvp_prefetch_dramclk = true; + subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz); + } + /* FCLK */ /* Always update saved value, even if new value not set due to P-State switching unsupported */ if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) { @@ -1129,6 +960,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( } } + if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) { + clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz; + update_subvp_prefetch_fclk = true; + subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz); + } + /* When idle DPM is enabled, need to send active and idle hardmins separately */ /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */ if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) { @@ -1146,6 +983,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( num_steps++; } + /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */ + if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) { + block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz; + block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz; + block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS; + num_steps++; + } + /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ if (update_active_uclk || update_idle_uclk) { if (!is_idle_dpm_enabled) { @@ -1178,14 +1023,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( // (*num_steps)++; // } - /* disable FCLK P-State support if needed */ - if (!fclk_p_state_change_support && - should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && - dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { - block_sequence[num_steps].params.update_pstate_support_params.support = false; - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; - num_steps++; - } + /* disable FCLK P-State support if needed (message not supported on DCN401)*/ + // if (!fclk_p_state_change_support && + // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && + // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { + // block_sequence[num_steps].params.update_pstate_support_params.support = false; + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; + // num_steps++; + // } } if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching && @@ -1366,11 +1211,6 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, unsigned int num_steps = 0; - if (dc->debug.enable_legacy_clock_update) { - dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower); - return; - } - /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, @@ -1505,6 +1345,20 @@ static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool curren dcn401_execute_block_sequence(clk_mgr_base, num_steps); } +static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz; +} + +static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz; +} + /* Get current memclk states, update bounding box */ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) { @@ -1549,6 +1403,15 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) if (clk_mgr->dpm_present && !num_levels) clk_mgr->dpm_present = false; + clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr); + if (clk_mgr_base->ctx->dc_bios) { + /* use BIOS values if none provided by PMFW */ + if (clk_mgr_base->bw_params->num_channels == 0) { + clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans; + } + clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes; + } + /* Refresh bounding box */ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); @@ -1638,6 +1501,8 @@ static struct clk_mgr_funcs dcn401_funcs = { .enable_pme_wa = dcn401_enable_pme_wa, .is_smu_present = dcn401_is_smu_present, .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist, + .get_hard_min_memclk = dcn401_get_hard_min_memclk, + .get_hard_min_fclk = dcn401_get_hard_min_fclk, }; struct clk_mgr_internal *dcn401_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h index 8b0461992b22..6c9ae5ca2c7e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h @@ -90,6 +90,7 @@ enum dcn401_clk_mgr_block_sequence_func { CLK_MGR401_UPDATE_DTBCLK_DTO, CLK_MGR401_UPDATE_DENTIST, CLK_MGR401_UPDATE_PSR_WAIT_LOOP, + CLK_MGR401_UPDATE_SUBVP_HARDMINS, }; struct dcn401_clk_mgr_block_sequence { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 7700477d019b..21c35528f61f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -21,6 +21,14 @@ #define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } +/* temporary define */ +#ifndef DALSMC_MSG_SubvpUclkFclk +#define DALSMC_MSG_SubvpUclkFclk 0x1B +#endif +#ifndef DALSMC_MSG_GetNumUmcChannels +#define DALSMC_MSG_GetNumUmcChannels 0x1C +#endif + /* * Function to be used instead of REG_WAIT macro because the wait ends when * the register is NOT EQUAL to zero, and because the translation in msg_if.h @@ -296,6 +304,24 @@ bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, return success; } +bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, + uint16_t uclk_freq_mhz, + uint16_t fclk_freq_mhz) +{ + uint32_t response = 0; + bool success; + + /* 15:0 for uclk, 32:16 for fclk */ + uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz; + + smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz); + + success = dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SubvpUclkFclk, param, &response); + + return success; +} + void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz) { smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz); @@ -311,3 +337,14 @@ void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t n dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_NumOfDisplays, num_displays, NULL); } + +unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr) +{ + unsigned int response = 0; + + dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response); + + smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index 651fb8d62864..e02eb1294b37 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -23,7 +23,11 @@ bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, uint16_t uclk_freq_mhz, uint16_t fclk_freq_mhz); +bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, + uint16_t uclk_freq_mhz, + uint16_t fclk_freq_mhz); void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); +unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 49fe7dcf9372..cecaadf741ad 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -579,7 +579,7 @@ dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu, bool dc_stream_forward_crc_window(struct dc_stream_state *stream, - struct rect *rect, bool is_stop) + struct rect *rect, uint8_t phy_id, bool is_stop) { struct dmcu *dmcu; struct dc_dmub_srv *dmub_srv; @@ -598,7 +598,7 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, if (i == MAX_PIPES) return false; - mux_mapping.phy_output_num = stream->link->link_enc_hw_inst; + mux_mapping.phy_output_num = phy_id; mux_mapping.otg_output_num = pipe->stream_res.tg->inst; dmcu = dc->res_pool->dmcu; @@ -615,6 +615,68 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, return true; } + +static void +dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv, + struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop) +{ + int i; + union dmub_rb_cmd cmd = {0}; + + cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num; + cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num; + + cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY; + + if (stop) { + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE; + } else { + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY; + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable; + } + } + + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); +} + +bool +dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, + struct crc_window *window, uint8_t phy_id, bool stop) +{ + struct dc_dmub_srv *dmub_srv; + struct otg_phy_mux mux_mapping; + struct pipe_ctx *pipe; + int i; + struct dc *dc = stream->ctx->dc; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) + break; + } + + /* Stream not found */ + if (i == MAX_PIPES) + return false; + + mux_mapping.phy_output_num = phy_id; + mux_mapping.otg_output_num = pipe->stream_res.tg->inst; + + dmub_srv = dc->ctx->dmub_srv; + + /* forward to dmub only. no dmcu support*/ + if (dmub_srv) + dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop); + else + return false; + + return true; +} #endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ /** @@ -625,15 +687,17 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, * @enable: Enable CRC if true, disable otherwise. * @continuous: Capture CRC on every frame if true. Otherwise, only capture * once. + * @idx: Capture CRC on which CRC engine instance + * @reset: Reset CRC engine before the configuration * - * By default, only CRC0 is configured, and the entire frame is used to - * calculate the CRC. + * By default, the entire frame is used to calculate the CRC. * * Return: %false if the stream is not found or CRC capture is not supported; * %true if the stream has been configured. */ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, - struct crc_params *crc_window, bool enable, bool continuous) + struct crc_params *crc_window, bool enable, bool continuous, + uint8_t idx, bool reset) { struct pipe_ctx *pipe; struct crc_params param; @@ -677,6 +741,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, param.continuous_mode = continuous; param.enable = enable; + param.crc_eng_inst = idx; + param.reset = reset; + tg = pipe->stream_res.tg; /* Only call if supported */ @@ -691,6 +758,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, * * @dc: DC object. * @stream: The DC stream state of the stream to get CRCs from. + * @idx: index of crc engine to get CRC from * @r_cr: CRC value for the red component. * @g_y: CRC value for the green component. * @b_cb: CRC value for the blue component. @@ -700,7 +768,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, * Return: * %false if stream is not found, or if CRCs are not enabled. */ -bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, +bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { int i; @@ -721,7 +789,7 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, tg = pipe->stream_res.tg; if (tg->funcs->get_crc) - return tg->funcs->get_crc(tg, r_cr, g_y, b_cb); + return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb); DC_LOG_WARNING("CRC capture not supported."); return false; } @@ -1173,6 +1241,8 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2) get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC) + get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); } } } @@ -2153,6 +2223,11 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params struct dc_stream_state *stream = params->streams[i]; struct dc_stream_status *status = dc_stream_get_status(stream); + /* revalidate streams */ + res = dc_validate_stream(dc, stream); + if (res != DC_OK) + return res; + dc_stream_log(dc, stream); set[i].stream = stream; @@ -2487,7 +2562,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc *dc, if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, - sizeof(union dc_tiling_info)) != 0) { + sizeof(struct dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; elevate_update_type(&update_type, UPDATE_TYPE_MED); @@ -2982,6 +3057,10 @@ static void copy_surface_update_to_plane( if (srf_update->cursor_csc_color_matrix) surface->cursor_csc_color_matrix = *srf_update->cursor_csc_color_matrix; + + if (srf_update->bias_and_scale.bias_and_scale_valid) + surface->bias_and_scale = + srf_update->bias_and_scale; } static void copy_stream_update_to_stream(struct dc *dc, @@ -4510,7 +4589,7 @@ static bool commit_minimal_transition_based_on_current_context(struct dc *dc, struct pipe_split_policy_backup policy; struct dc_state *intermediate_context; struct dc_state *old_current_state = dc->current_state; - struct dc_surface_update srf_updates[MAX_SURFACE_NUM] = {0}; + struct dc_surface_update srf_updates[MAX_SURFACES] = {0}; int surface_count; /* @@ -5307,11 +5386,9 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc->vm_pa_config.valid) { dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); } - break; default: ASSERT(dc->current_state->stream_count == 0); - dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); dc_state_destruct(dc->current_state); @@ -5435,6 +5512,11 @@ bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips) void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name) { + int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0; + enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0}; + struct pipe_ctx *pipe = NULL; + struct dc_state *context = dc->current_state; + if (dc->debug.disable_idle_power_optimizations) { DC_LOG_DEBUG("%s: disabled\n", __func__); return; @@ -5459,6 +5541,23 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const dc->idle_optimizations_allowed = allow; DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled"); } + + // log idle clocks and sub vp pipe types at idle optimization time + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk) + idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr); + + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk) + idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + } + + DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", + __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2], + subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name); + } void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name) @@ -6056,7 +6155,7 @@ void dc_query_current_properties(struct dc *dc, struct dc_current_properties *pr bool subvp_sw_cursor_req = false; for (i = 0; i < dc->current_state->stream_count; i++) { - if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i])) { + if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i]) && !dc->current_state->streams[i]->hw_cursor_req) { subvp_sw_cursor_req = true; break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 252af83e34a5..6eb9bae3af91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -425,6 +425,44 @@ void get_hdr_visual_confirm_color( } } +/* Visual Confirm color definition for VABC */ +void get_vabc_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + struct dc_link *edp_link = NULL; + + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) { + if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP) + edp_link = pipe_ctx->stream->link; + } + + if (edp_link) { + switch (edp_link->backlight_control_type) { + case BACKLIGHT_CONTROL_PWM: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case BACKLIGHT_CONTROL_AMD_AUX: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case BACKLIGHT_CONTROL_VESA_AUX: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + } + } else { + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = 0; + } +} + void get_subvp_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index 457d60eeb486..c1b79b379447 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -125,6 +125,14 @@ uint32_t dc_link_bandwidth_kbps( return link->dc->link_srv->dp_link_bandwidth_kbps(link, link_settings); } +uint32_t dc_link_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params) +{ + return link->dc->link_srv->dp_required_hblank_size_bytes(link, + audio_params); +} + void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map) { dc->link_srv->get_cur_res_map(dc, map); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 626f75b6ad00..520a34a42827 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4478,7 +4478,7 @@ static void set_hfvs_info_packet( static void adaptive_sync_override_dp_info_packets_sdp_line_num( const struct dc_crtc_timing *timing, struct enc_sdp_line_num *sdp_line_num, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) + unsigned int vstartup_start) { uint32_t asic_blank_start = 0; uint32_t asic_blank_end = 0; @@ -4493,8 +4493,8 @@ static void adaptive_sync_override_dp_info_packets_sdp_line_num( asic_blank_end = (asic_blank_start - tg->v_border_bottom - tg->v_addressable - tg->v_border_top); - if (pipe_dlg_param->vstartup_start > asic_blank_end) { - v_update = (tg->v_total - (pipe_dlg_param->vstartup_start - asic_blank_end)); + if (vstartup_start > asic_blank_end) { + v_update = (tg->v_total - (vstartup_start - asic_blank_end)); sdp_line_num->adaptive_sync_line_num_valid = true; sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1); } else { @@ -4507,7 +4507,7 @@ static void set_adaptive_sync_info_packet( struct dc_info_packet *info_packet, const struct dc_stream_state *stream, struct encoder_info_frame *info_frame, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) + unsigned int vstartup_start) { if (!stream->adaptive_sync_infopacket.valid) return; @@ -4515,7 +4515,7 @@ static void set_adaptive_sync_info_packet( adaptive_sync_override_dp_info_packets_sdp_line_num( &stream->timing, &info_frame->sdp_line_num, - pipe_dlg_param); + vstartup_start); *info_packet = stream->adaptive_sync_infopacket; } @@ -4548,6 +4548,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) { enum signal_type signal = SIGNAL_TYPE_NONE; struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; + unsigned int vstartup_start = 0; /* default all packets to invalid */ info->avi.valid = false; @@ -4561,6 +4562,9 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) info->adaptive_sync.valid = false; signal = pipe_ctx->stream->signal; + if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe) + vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx); + /* HDMi and DP have different info packets*/ if (dc_is_hdmi_signal(signal)) { set_avi_info_frame(&info->avi, pipe_ctx); @@ -4582,7 +4586,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) set_adaptive_sync_info_packet(&info->adaptive_sync, pipe_ctx->stream, info, - &pipe_ctx->pipe_dlg_param); + vstartup_start); } patch_gamut_packet_checksum(&info->gamut); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e006f816ff2f..1b2cce127981 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -483,9 +483,9 @@ bool dc_state_add_plane( if (stream_status == NULL) { dm_error("Existing stream not found; failed to attach surface!\n"); goto out; - } else if (stream_status->plane_count == MAX_SURFACE_NUM) { + } else if (stream_status->plane_count == MAX_SURFACES) { dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", - plane_state, MAX_SURFACE_NUM); + plane_state, MAX_SURFACES); goto out; } else if (!otg_master_pipe) { goto out; @@ -600,7 +600,7 @@ bool dc_state_rem_all_planes_for_stream( { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < state->stream_count; i++) if (state->streams[i] == stream) { @@ -875,7 +875,7 @@ bool dc_state_rem_all_phantom_planes_for_stream( { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < state->stream_count; i++) if (state->streams[i] == phantom_stream) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 55dc482d9b36..e8134c47fe0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -37,6 +37,8 @@ #define DC_LOGGER dc->ctx->logger #ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif +#ifndef MAX #define MAX(x, y) ((x > y) ? x : y) #endif @@ -605,17 +607,6 @@ bool dc_stream_remove_writeback(struct dc *dc, return true; } -bool dc_stream_warmup_writeback(struct dc *dc, - int num_dwb, - struct dc_writeback_info *wb_info) -{ - dc_exit_ips_for_hw_access(dc); - - if (dc->hwss.mmhubbub_warmup) - return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info); - else - return false; -} uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) { uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index ccbb15f1638c..f3471d45b312 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -83,13 +83,6 @@ uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane /******************************************************************************* * Public functions ******************************************************************************/ -void enable_surface_flip_reporting(struct dc_plane_state *plane_state, - uint32_t controller_id) -{ - plane_state->irq_source = controller_id + DC_IRQ_SOURCE_PFLIP1 - 1; - /*register_flip_interrupt(surface);*/ -} - struct dc_plane_state *dc_create_plane_state(const struct dc *dc) { struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state), @@ -277,4 +270,50 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut) kref_get(&lut->refcount); } +void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state, + bool clear_tiling) +{ + struct dc *dc; + int i; + + if (!plane_state) + return; + + dc = plane_state->ctx->dc; + if (!dc || !dc->current_state) + return; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx) + continue; + + if (dc->ctx->dce_version >= DCE_VERSION_MAX) { + struct hubp *hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + /* if framebuffer is tiled, disable tiling */ + if (clear_tiling && hubp->funcs->hubp_clear_tiling) + hubp->funcs->hubp_clear_tiling(hubp); + + /* force page flip to see the new content of the framebuffer */ + hubp->funcs->hubp_program_surface_flip_and_addr(hubp, + &plane_state->address, + true); + } else { + struct mem_input *mi = pipe_ctx->plane_res.mi; + if (!mi) + continue; + /* if framebuffer is tiled, disable tiling */ + if (clear_tiling && mi->funcs->mem_input_clear_tiling) + mi->funcs->mem_input_clear_tiling(mi); + + /* force page flip to see the new content of the framebuffer */ + mi->funcs->mem_input_program_surface_flip_and_addr(mi, + &plane_state->address, + true); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e9b9126c0401..053481ab69ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,9 +55,9 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.310" +#define DC_VER "3.2.316" -#define MAX_SURFACES 3 +#define MAX_SURFACES 4 #define MAX_PLANES 6 #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 @@ -463,6 +463,7 @@ struct dc_config { bool enable_auto_dpm_test_logs; unsigned int disable_ips; unsigned int disable_ips_in_vpb; + bool disable_ips_in_dpms_off; bool usb4_bw_alloc_support; bool allow_0_dtb_clk; bool use_assr_psp_message; @@ -471,6 +472,7 @@ struct dc_config { bool disable_hbr_audio_dp2; bool consolidated_dpia_dp_lt; bool set_pipe_unlock_order; + bool enable_dpia_pre_training; }; enum visual_confirm { @@ -487,6 +489,7 @@ enum visual_confirm { VISUAL_CONFIRM_MCLK_SWITCH = 16, VISUAL_CONFIRM_FAMS2 = 19, VISUAL_CONFIRM_HW_CURSOR = 20, + VISUAL_CONFIRM_VABC = 21, }; enum dc_psr_power_opts { @@ -628,6 +631,8 @@ struct dc_clocks { int bw_dispclk_khz; int idle_dramclk_khz; int idle_fclk_khz; + int subvp_prefetch_dramclk_khz; + int subvp_prefetch_fclk_khz; }; struct dc_bw_validation_profile { @@ -772,7 +777,8 @@ union dpia_debug_options { uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ - uint32_t reserved:25; + uint32_t enable_dpia_pre_training:1; /* bit 7 */ + uint32_t reserved:24; } bits; uint32_t raw; }; @@ -1055,8 +1061,8 @@ struct dc_debug_options { bool dml21_force_pstate_method; uint32_t dml21_force_pstate_method_values[MAX_PIPES]; uint32_t dml21_disable_pstate_method_mask; + union fw_assisted_mclk_switch_version fams_version; union dmub_fams2_global_feature_config fams2_config; - bool enable_legacy_clock_update; unsigned int force_cositing; unsigned int disable_spl; unsigned int force_easf; @@ -1070,6 +1076,7 @@ struct dc_debug_options { bool skip_full_updated_if_possible; unsigned int enable_oled_edp_power_up_opt; bool enable_hblank_borrow; + bool force_subvp_df_throttle; }; @@ -1300,7 +1307,7 @@ struct dc_plane_state { struct rect clip_rect; struct plane_size plane_size; - union dc_tiling_info tiling_info; + struct dc_tiling_info tiling_info; struct dc_plane_dcc_param dcc; @@ -1371,7 +1378,7 @@ struct dc_plane_state { struct dc_plane_info { struct plane_size plane_size; - union dc_tiling_info tiling_info; + struct dc_tiling_info tiling_info; struct dc_plane_dcc_param dcc; enum surface_pixel_format format; enum dc_rotation_angle rotation; @@ -1398,7 +1405,7 @@ struct dc_scratch_space { * store current value in plane states so we can still recover * a valid current state during dc update. */ - struct dc_plane_state plane_states[MAX_SURFACE_NUM]; + struct dc_plane_state plane_states[MAX_SURFACES]; struct dc_stream_state stream_state; }; @@ -1526,6 +1533,7 @@ struct dc_surface_update { const struct dc_cm2_parameters *cm2_params; const struct dc_csc_transform *cursor_csc_color_matrix; unsigned int sdr_white_level_nits; + struct dc_bias_and_scale bias_and_scale; }; /* @@ -2019,6 +2027,24 @@ uint32_t dc_link_bandwidth_kbps( const struct dc_link *link, const struct dc_link_settings *link_setting); +struct dp_audio_bandwidth_params { + const struct dc_crtc_timing *crtc_timing; + enum dp_link_encoding link_encoding; + uint32_t channel_count; + uint32_t sample_rate_hz; +}; + +/* The function calculates the minimum size of hblank (in bytes) needed to + * support the specified channel count and sample rate combination, given the + * link encoding and timing to be used. This calculation is not supported + * for 8b/10b SST. + * + * return - min hblank size in bytes, 0 if 8b/10b SST. + */ +uint32_t dc_link_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + /* The function takes a snapshot of current link resource allocation state * @dc: pointer to dc of the dm calling this * @map: a dc link resource snapshot defined internally to dc. @@ -2378,6 +2404,13 @@ struct dc_sink_dsc_caps { struct dsc_dec_dpcd_caps dsc_dec_caps; }; +struct dc_sink_hblank_expansion_caps { + // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology), + // 'false' if they are sink's HBlank expansion caps + bool is_virtual_dpcd_hblank_expansion; + struct hblank_expansion_dpcd_caps dpcd_caps; +}; + struct dc_sink_fec_caps { bool is_rx_fec_supported; bool is_topology_fec_supported; @@ -2404,6 +2437,7 @@ struct dc_sink { struct scdc_caps scdc_caps; struct dc_sink_dsc_caps dsc_caps; struct dc_sink_fec_caps fec_caps; + struct dc_sink_hblank_expansion_caps hblank_expansion_caps; bool is_vsc_sdp_colorimetry_supported; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index f90fc154549a..44ff9abe2880 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1245,7 +1245,7 @@ static int count_active_streams(const struct dc *dc) for (i = 0; i < dc->current_state->stream_count; ++i) { struct dc_stream_state *stream = dc->current_state->streams[i]; - if (stream && !stream->dpms_off) + if (stream && (!stream->dpms_off || dc->config.disable_ips_in_dpms_off)) count += 1; } @@ -1694,10 +1694,10 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, { uint8_t num_cmds = 1; uint32_t i; - union dmub_rb_cmd cmd[MAX_STREAMS + 1]; + union dmub_rb_cmd cmd[2 * MAX_STREAMS + 1]; struct dmub_rb_cmd_fams2 *global_cmd = &cmd[0].fams2_config; - memset(cmd, 0, sizeof(union dmub_rb_cmd) * (MAX_STREAMS + 1)); + memset(cmd, 0, sizeof(union dmub_rb_cmd) * (2 * MAX_STREAMS + 1)); /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; @@ -1714,17 +1714,26 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* construct per-stream configs */ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { - struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; + struct dmub_rb_cmd_fams2 *stream_base_cmd = &cmd[i+1].fams2_config; + struct dmub_rb_cmd_fams2 *stream_sub_state_cmd = &cmd[i+1+context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config; /* configure command header */ - stream_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; - stream_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - stream_cmd->header.multi_cmd_pending = 1; - /* copy stream static state */ - memcpy(&stream_cmd->config.stream, - &context->bw_ctx.bw.dcn.fams2_stream_params[i], - sizeof(struct dmub_fams2_stream_static_state)); + stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; + stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.multi_cmd_pending = 1; + stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; + stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.multi_cmd_pending = 1; + /* copy stream static base state */ + memcpy(&stream_base_cmd->config, + &context->bw_ctx.bw.dcn.fams2_stream_base_params[i], + sizeof(union dmub_cmd_fams2_config)); + /* copy stream static sub state */ + memcpy(&stream_sub_state_cmd->config, + &context->bw_ctx.bw.dcn.fams2_stream_sub_params[i], + sizeof(union dmub_cmd_fams2_config)); } } @@ -1735,8 +1744,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; + cmd[2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += 2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 8dd6eb044829..94ce8fe74481 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -969,6 +969,21 @@ union dp_sink_video_fallback_formats { uint8_t raw; }; +union dp_receive_port0_cap { + struct { + uint8_t RESERVED :1; + uint8_t LOCAL_EDID_PRESENT :1; + uint8_t ASSOCIATED_TO_PRECEDING_PORT:1; + uint8_t HBLANK_EXPANSION_CAPABLE :1; + uint8_t BUFFER_SIZE_UNIT :1; + uint8_t BUFFER_SIZE_PER_PORT :1; + uint8_t HBLANK_REDUCTION_CAPABLE :1; + uint8_t RESERVED2:1; + uint8_t BUFFER_SIZE:8; + } bits; + uint8_t raw[2]; +}; + union dpcd_max_uncompressed_pixel_rate_cap { struct { uint16_t max_uncompressed_pixel_rate_cap :15; @@ -1193,6 +1208,7 @@ struct dpcd_caps { struct replay_info pr_info; uint16_t edp_oled_emission_rate; + union dp_receive_port0_cap receive_port0_cap; }; union dpcd_sink_ext_caps { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index 9014c2409817..9d18f1c08079 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -94,6 +94,11 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( const int num_slices_h, const bool is_dp); +void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, + const struct dsc_dec_dpcd_caps *dsc_sink_caps); +void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, + const struct dc_crtc_timing *timing); + /* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM, * and DM can choose to OVERRIDE the limitation on CASE BY CASE basis. * Hardware/specs limitation should not be writable by DM. diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index c10567ec1c81..5ac55601a6da 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -341,89 +341,101 @@ enum swizzle_mode_addr3_values { DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX }; -union dc_tiling_info { - - struct { - /* Specifies the number of memory banks for tiling - * purposes. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 2,4,8,16 - */ - unsigned int num_banks; - /* Specifies the number of tiles in the x direction - * to be incorporated into the same bank. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 1,2,4,8 - */ - unsigned int bank_width; - unsigned int bank_width_c; - /* Specifies the number of tiles in the y direction to - * be incorporated into the same bank. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 1,2,4,8 - */ - unsigned int bank_height; - unsigned int bank_height_c; - /* Specifies the macro tile aspect ratio. Only applies - * to 2D and 3D tiling modes. - */ - unsigned int tile_aspect; - unsigned int tile_aspect_c; - /* Specifies the number of bytes that will be stored - * contiguously for each tile. - * If the tile data requires more storage than this - * amount, it is split into multiple slices. - * This field must not be larger than - * GB_ADDR_CONFIG.DRAM_ROW_SIZE. - * Only applies to 2D and 3D tiling modes. - * For color render targets, TILE_SPLIT >= 256B. - */ - enum tile_split_values tile_split; - enum tile_split_values tile_split_c; - /* Specifies the addressing within a tile. - * 0x0 - DISPLAY_MICRO_TILING - * 0x1 - THIN_MICRO_TILING - * 0x2 - DEPTH_MICRO_TILING - * 0x3 - ROTATED_MICRO_TILING - */ - enum tile_mode_values tile_mode; - enum tile_mode_values tile_mode_c; - /* Specifies the number of pipes and how they are - * interleaved in the surface. - * Refer to memory addressing document for complete - * details and constraints. - */ - unsigned int pipe_config; - /* Specifies the tiling mode of the surface. - * THIN tiles use an 8x8x1 tile size. - * THICK tiles use an 8x8x4 tile size. - * 2D tiling modes rotate banks for successive Z slices - * 3D tiling modes rotate pipes and banks for Z slices - * Refer to memory addressing document for complete - * details and constraints. - */ - enum array_mode_values array_mode; - } gfx8; +enum dc_gfxversion { + DcGfxVersion7 = 0, + DcGfxVersion8, + DcGfxVersion9, + DcGfxVersion10, + DcGfxVersion11, + DcGfxAddr3, + DcGfxVersionUnknown +}; + + struct dc_tiling_info { + unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum + union { + struct { + /* Specifies the number of memory banks for tiling + * purposes. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 2,4,8,16 + */ + unsigned int num_banks; + /* Specifies the number of tiles in the x direction + * to be incorporated into the same bank. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 1,2,4,8 + */ + unsigned int bank_width; + unsigned int bank_width_c; + /* Specifies the number of tiles in the y direction to + * be incorporated into the same bank. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 1,2,4,8 + */ + unsigned int bank_height; + unsigned int bank_height_c; + /* Specifies the macro tile aspect ratio. Only applies + * to 2D and 3D tiling modes. + */ + unsigned int tile_aspect; + unsigned int tile_aspect_c; + /* Specifies the number of bytes that will be stored + * contiguously for each tile. + * If the tile data requires more storage than this + * amount, it is split into multiple slices. + * This field must not be larger than + * GB_ADDR_CONFIG.DRAM_ROW_SIZE. + * Only applies to 2D and 3D tiling modes. + * For color render targets, TILE_SPLIT >= 256B. + */ + enum tile_split_values tile_split; + enum tile_split_values tile_split_c; + /* Specifies the addressing within a tile. + * 0x0 - DISPLAY_MICRO_TILING + * 0x1 - THIN_MICRO_TILING + * 0x2 - DEPTH_MICRO_TILING + * 0x3 - ROTATED_MICRO_TILING + */ + enum tile_mode_values tile_mode; + enum tile_mode_values tile_mode_c; + /* Specifies the number of pipes and how they are + * interleaved in the surface. + * Refer to memory addressing document for complete + * details and constraints. + */ + unsigned int pipe_config; + /* Specifies the tiling mode of the surface. + * THIN tiles use an 8x8x1 tile size. + * THICK tiles use an 8x8x4 tile size. + * 2D tiling modes rotate banks for successive Z slices + * 3D tiling modes rotate pipes and banks for Z slices + * Refer to memory addressing document for complete + * details and constraints. + */ + enum array_mode_values array_mode; + } gfx8; - struct { - enum swizzle_mode_values swizzle; - unsigned int num_pipes; - unsigned int max_compressed_frags; - unsigned int pipe_interleave; - - unsigned int num_banks; - unsigned int num_shader_engines; - unsigned int num_rb_per_se; - bool shaderEnable; - - bool meta_linear; - bool rb_aligned; - bool pipe_aligned; - unsigned int num_pkrs; - } gfx9;/*gfx9, gfx10 and above*/ - struct { - enum swizzle_mode_addr3_values swizzle; - } gfx_addr3;/*gfx with addr3 and above*/ + struct { + enum swizzle_mode_values swizzle; + unsigned int num_pipes; + unsigned int max_compressed_frags; + unsigned int pipe_interleave; + + unsigned int num_banks; + unsigned int num_shader_engines; + unsigned int num_rb_per_se; + bool shaderEnable; + + bool meta_linear; + bool rb_aligned; + bool pipe_aligned; + unsigned int num_pkrs; + } gfx9;/*gfx9, gfx10 and above*/ + struct { + enum swizzle_mode_addr3_values swizzle; + } gfx_addr3;/*gfx with addr3 and above*/ + }; }; /* Rotation angle */ @@ -975,6 +987,9 @@ struct dc_crtc_timing { struct dc_crtc_timing_flags flags; uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */ struct dc_dsc_config dsc_cfg; + + /* The number of pixels that HBlank has been expanded by from the original EDID timing. */ + uint32_t expanded_hblank; }; enum trigger_delay { diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h index bd37ec82b42d..fabcefeda288 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_plane.h +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -34,4 +34,7 @@ const struct dc_plane_status *dc_plane_get_status( void dc_plane_state_retain(struct dc_plane_state *plane_state); void dc_plane_state_release(struct dc_plane_state *plane_state); +void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state, + bool clear_tiling); + #endif /* _DC_PLANE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 0e310fd48b5c..3518eb1b8cd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -64,6 +64,13 @@ static void populate_inits_from_splinits(struct scl_inits *inits, inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } +static void populate_splformat_from_format(enum spl_pixel_format *spl_pixel_format, const enum pixel_format pixel_format) +{ + if (pixel_format < PIXEL_FORMAT_INVALID) + *spl_pixel_format = (enum spl_pixel_format)pixel_format; + else + *spl_pixel_format = SPL_PIXEL_FORMAT_INVALID; +} /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx /// @param spl_in @@ -89,7 +96,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->callbacks = dcn2_spl_callbacks; } // Make format field from spl_in point to plane_res scl_data format - spl_in->basic_in.format = (enum spl_pixel_format)pipe_ctx->plane_res.scl_data.format; + populate_splformat_from_format(&spl_in->basic_in.format, pipe_ctx->plane_res.scl_data.format); // Make view_format from basic_out point to view_format from stream spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format; // Populate spl input basic input clip rect from plane state clip rect @@ -108,12 +115,14 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror; // Calculate horizontal splits and split index - spl_in->basic_in.mpc_combine_h = resource_get_mpc_slice_count(pipe_ctx); + spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned = false; + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices = + resource_get_mpc_slice_count(pipe_ctx); if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE) - spl_in->basic_in.mpc_combine_v = 0; + spl_in->basic_in.mpc_h_slice_index = 0; else - spl_in->basic_in.mpc_combine_v = resource_get_mpc_slice_index(pipe_ctx); + spl_in->basic_in.mpc_h_slice_index = resource_get_mpc_slice_index(pipe_ctx); populate_splrect_from_rect(&spl_in->basic_out.odm_slice_rect, &odm_slice_src); spl_in->basic_out.odm_combine_factor = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 413970588a26..3e303c7808fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -56,7 +56,7 @@ struct dc_stream_status { int plane_count; int audio_inst; struct timing_sync_info timing_sync_info; - struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; + struct dc_plane_state *plane_states[MAX_SURFACES]; bool is_abm_supported; struct mall_stream_config mall_stream_config; bool fpo_in_use; @@ -447,10 +447,6 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc, struct dc_state *state, struct dc_stream_state *stream); -bool dc_stream_warmup_writeback(struct dc *dc, - int num_dwb, - struct dc_writeback_info *wb_info); - bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream); bool dc_stream_set_dynamic_metadata(struct dc *dc, @@ -541,17 +537,26 @@ bool dc_stream_get_crtc_position(struct dc *dc, #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) bool dc_stream_forward_crc_window(struct dc_stream_state *stream, struct rect *rect, + uint8_t phy_id, bool is_stop); + +bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, + struct crc_window *window, + uint8_t phy_id, + bool stop); #endif bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, struct crc_params *crc_window, bool enable, - bool continuous); + bool continuous, + uint8_t idx, + bool reset); bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, + uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index edf4df1d03b5..0c2aa91f0a11 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -76,7 +76,6 @@ struct dc_perf_trace { unsigned long last_entry_write; }; -#define MAX_SURFACE_NUM 6 #define NUM_PIXEL_FORMATS 10 enum tiling_mode { @@ -875,6 +874,14 @@ struct dsc_dec_dpcd_caps { bool is_dp; /* Decoded format */ }; +struct hblank_expansion_dpcd_caps { + bool expansion_supported; + bool reduction_supported; + bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/ + bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/ + uint32_t buffer_size; /* Add 1 to value and multiply by 32 */ +}; + struct dc_golden_table { uint16_t dc_golden_table_ver; uint32_t aux_dphy_rx_control0_val; @@ -932,10 +939,17 @@ enum backlight_control_type { }; #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) +#define MAX_CRC_WINDOW_NUM 2 + struct otg_phy_mux { uint8_t phy_output_num; uint8_t otg_output_num; }; + +struct crc_window { + struct rect rect; + bool enable; +}; #endif enum dc_detect_reason { @@ -1052,10 +1066,13 @@ enum replay_FW_Message_type { union replay_error_status { struct { - unsigned char STATE_TRANSITION_ERROR :1; - unsigned char LINK_CRC_ERROR :1; - unsigned char DESYNC_ERROR :1; - unsigned char RESERVED :5; + unsigned int STATE_TRANSITION_ERROR :1; + unsigned int LINK_CRC_ERROR :1; + unsigned int DESYNC_ERROR :1; + unsigned int RESERVED_3 :1; + unsigned int LOW_RR_INCORRECT_VTOTAL :1; + unsigned int NO_DOUBLED_RR :1; + unsigned int RESERVED_6_7 :2; } bits; unsigned char raw; }; @@ -1102,6 +1119,8 @@ struct replay_config { union replay_error_status replay_error_status; /* Replay Low Hz enable Options */ union replay_low_refresh_rate_enable_options low_rr_enable_options; + /* Replay coasting vtotal is within low refresh rate range. */ + bool low_rr_activated; }; /* Replay feature flags*/ @@ -1126,10 +1145,12 @@ struct replay_settings { uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ uint32_t link_off_frame_count; - /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ - uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; + /* Replay pseudo vtotal for low refresh rate*/ + uint16_t low_rr_full_screen_video_pseudo_vtotal; /* Replay last pseudo vtotal set to DMUB */ uint16_t last_pseudo_vtotal; + /* Replay desync error */ + uint32_t replay_desync_error_fail_count; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index b700608e4240..077337698e0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1105,6 +1105,9 @@ static bool dcn401_program_pix_clk( &dto_params); } else { + if (pll_settings->actual_pix_clk_100hz > 6000000UL) + return false; + /* disables DP DTO when provided with TMDS signal type */ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto( clock_source->ctx->dc->res_pool->dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index f5e1d9caee4c..1c2009e38aa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -98,7 +98,7 @@ static enum mi_bits_per_pixel get_mi_bpp( } static enum mi_tiling_format get_mi_tiling( - union dc_tiling_info *tiling_info) + struct dc_tiling_info *tiling_info) { switch (tiling_info->gfx8.array_mode) { case DC_ARRAY_1D_TILED_THIN1: @@ -133,7 +133,7 @@ static bool is_vert_scan(enum dc_rotation_angle rotation) static void dce_mi_program_pte_vm( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation) { struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); @@ -430,7 +430,7 @@ static void dce120_mi_program_display_marks(struct mem_input *mi, } static void program_tiling( - struct dce_mem_input *dce_mi, const union dc_tiling_info *info) + struct dce_mem_input *dce_mi, const struct dc_tiling_info *info) { if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */ REG_UPDATE_6(GRPH_CONTROL, @@ -481,7 +481,6 @@ static void program_tiling( } } - static void program_size_and_rotation( struct dce_mem_input *dce_mi, enum dc_rotation_angle rotation, @@ -627,10 +626,31 @@ static void program_grph_pixel_format( GRPH_PRESCALE_B_SIGN, sign); } +static void dce_mi_clear_tiling( + struct mem_input *mi) +{ + struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); + + if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */ + REG_UPDATE(GRPH_CONTROL, + GRPH_SW_MODE, DC_SW_LINEAR); + } + + if (dce_mi->masks->GRPH_MICRO_TILE_MODE) { /* GFX8 */ + REG_UPDATE(GRPH_CONTROL, + GRPH_ARRAY_MODE, DC_SW_LINEAR); + } + + if (dce_mi->masks->GRPH_ARRAY_MODE) { /* GFX6 but reuses gfx8 struct */ + REG_UPDATE(GRPH_CONTROL, + GRPH_ARRAY_MODE, DC_SW_LINEAR); + } +} + static void dce_mi_program_surface_config( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -650,7 +670,7 @@ static void dce_mi_program_surface_config( static void dce60_mi_program_surface_config( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, /* not used in DCE6 */ struct dc_plane_dcc_param *dcc, @@ -884,7 +904,8 @@ static const struct mem_input_funcs dce_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; #if defined(CONFIG_DRM_AMD_DC_SI) @@ -897,7 +918,8 @@ static const struct mem_input_funcs dce60_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce60_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; #endif @@ -910,7 +932,8 @@ static const struct mem_input_funcs dce112_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; static const struct mem_input_funcs dce120_mi_funcs = { @@ -922,7 +945,8 @@ static const struct mem_input_funcs dce120_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; void dce_mem_input_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index bf636b28e3e1..5bb8b78bf250 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,7 +63,8 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || + link->psr_settings.psr_version == DC_PSR_VERSION_1) return true; if (link->replay_settings.replay_feature_enabled) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index cae18f8c1c9a..88c75c243bf8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -390,8 +390,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1, sizeof(DP_SINK_DEVICE_STR_ID_1))) link->psr_settings.force_ffu_mode = 1; - else - link->psr_settings.force_ffu_mode = 0; + copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode; if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE && diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c index 8a3fbf95c48f..2c43c2422638 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c @@ -162,7 +162,7 @@ static void enable(struct dce_mem_input *mem_input110) static void program_tiling( struct dce_mem_input *mem_input110, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { uint32_t value = 0; @@ -523,7 +523,7 @@ static const unsigned int dvmm_Hw_Setting_Linear[4][9] = { /* Helper to get table entry from surface info */ static const unsigned int *get_dvmm_hw_setting( - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum surface_pixel_format format, bool chroma) { @@ -563,7 +563,7 @@ static const unsigned int *get_dvmm_hw_setting( static void dce_mem_input_v_program_pte_vm( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation) { struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input); @@ -636,7 +636,7 @@ static void dce_mem_input_v_program_pte_vm( static void dce_mem_input_v_program_surface_config( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index fa422a8cbced..61b0807693fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -2127,70 +2127,131 @@ bool dce110_configure_crc(struct timing_generator *tg, cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL); - /* First, disable CRC before we configure it. */ - dm_write_reg(tg->ctx, cntl_addr, 0); + if (!params->enable || params->reset) + /* First, disable CRC before we configure it. */ + dm_write_reg(tg->ctx, cntl_addr, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); - set_reg_field_value(value, params->windowa_x_start, - CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_START); - set_reg_field_value(value, params->windowa_x_end, - CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window A y axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); - set_reg_field_value(value, params->windowa_y_start, - CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_START); - set_reg_field_value(value, params->windowa_y_end, - CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window B x axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); - set_reg_field_value(value, params->windowb_x_start, - CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_START); - set_reg_field_value(value, params->windowb_x_end, - CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window B y axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); - set_reg_field_value(value, params->windowb_y_start, - CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_START); - set_reg_field_value(value, params->windowb_y_end, - CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_END); - dm_write_reg(tg->ctx, addr, value); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - value = 0; - set_reg_field_value(value, params->continuous_mode ? 1 : 0, - CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); - set_reg_field_value(value, params->selection, - CRTC_CRC_CNTL, CRTC_CRC0_SELECT); - set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); - dm_write_reg(tg->ctx, cntl_addr, value); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); + set_reg_field_value(value, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START); + set_reg_field_value(value, params->windowa_x_end, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window A y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); + set_reg_field_value(value, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START); + set_reg_field_value(value, params->windowa_y_end, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); + set_reg_field_value(value, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START); + set_reg_field_value(value, params->windowb_x_end, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); + set_reg_field_value(value, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START); + set_reg_field_value(value, params->windowb_y_end, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Set crc mode and selection, and enable.*/ + value = 0; + set_reg_field_value(value, params->continuous_mode ? 1 : 0, + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); + set_reg_field_value(value, params->selection, + CRTC_CRC_CNTL, CRTC_CRC0_SELECT); + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); + dm_write_reg(tg->ctx, cntl_addr, value); + break; + case 1: + /* Window A x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL); + set_reg_field_value(value, params->windowa_x_start, + CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_START); + set_reg_field_value(value, params->windowa_x_end, + CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window A y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL); + set_reg_field_value(value, params->windowa_y_start, + CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_START); + set_reg_field_value(value, params->windowa_y_end, + CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL); + set_reg_field_value(value, params->windowb_x_start, + CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_START); + set_reg_field_value(value, params->windowb_x_end, + CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL); + set_reg_field_value(value, params->windowb_y_start, + CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_START); + set_reg_field_value(value, params->windowb_y_end, + CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Set crc mode and selection, and enable.*/ + value = 0; + set_reg_field_value(value, params->continuous_mode ? 1 : 0, + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); + set_reg_field_value(value, params->selection, + CRTC_CRC_CNTL, CRTC_CRC1_SELECT); + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); + dm_write_reg(tg->ctx, cntl_addr, value); + break; + default: + return false; + } return true; } -bool dce110_get_crc(struct timing_generator *tg, +bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t addr = 0; @@ -2206,14 +2267,30 @@ bool dce110_get_crc(struct timing_generator *tg, if (!field) return false; - addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); - value = dm_read_reg(tg->ctx, addr); - *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); - *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); + switch (idx) { + case 0: + addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); + value = dm_read_reg(tg->ctx, addr); + *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); - addr = CRTC_REG(mmCRTC_CRC0_DATA_B); - value = dm_read_reg(tg->ctx, addr); - *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); + addr = CRTC_REG(mmCRTC_CRC0_DATA_B); + value = dm_read_reg(tg->ctx, addr); + *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); + break; + case 1: + addr = CRTC_REG(mmCRTC_CRC1_DATA_RG); + value = dm_read_reg(tg->ctx, addr); + *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR); + *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y); + + addr = CRTC_REG(mmCRTC_CRC1_DATA_B); + value = dm_read_reg(tg->ctx, addr); + *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index ee4de740aceb..e4f5cad64f32 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -286,7 +286,7 @@ bool dce110_arm_vert_intr( bool dce110_configure_crc(struct timing_generator *tg, const struct crc_params *params); -bool dce110_get_crc(struct timing_generator *tg, +bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing); diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index fcf59348eb62..31c4f44ceaac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -1100,45 +1100,79 @@ static bool dce120_configure_crc(struct timing_generator *tg, if (!dce120_is_tg_enabled(tg)) return false; - /* First, disable CRC before we configure it. */ - dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, - tg110->offsets.crtc, 0); + if (!params->enable || params->reset) + /* First, disable CRC before we configure it. */ + dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, - CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); - - /* Window A y axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, - CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); - - /* Window B x axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, - CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); - - /* Window B y axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, - CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, - CRTC_CRC_EN, params->continuous_mode ? 1 : 0, - CRTC_CRC0_SELECT, params->selection, - CRTC_CRC_EN, 1); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC0_SELECT, params->selection, + CRTC_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable */ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC1_SELECT, params->selection, + CRTC_CRC_EN, 1); + break; + default: + return false; + } return true; } -static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, - uint32_t *g_y, uint32_t *b_cb) +static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); uint32_t value, field; @@ -1151,14 +1185,30 @@ static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, if (!field) return false; - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, - tg110->offsets.crtc); - *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); - *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); + switch (idx) { + case 0: + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, - tg110->offsets.crtc); - *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + break; + case 1: + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y); + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 573898984726..f9961a6446f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -168,31 +168,33 @@ void dcn31_panel_cntl_construct( struct dcn31_panel_cntl *dcn31_panel_cntl, const struct panel_cntl_init_data *init_data) { - uint8_t pwrseq_inst = 0xF; dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; - switch (init_data->eng_id) { - case ENGINE_ID_DIGA: - pwrseq_inst = 0; - break; - case ENGINE_ID_DIGB: - pwrseq_inst = 1; - break; - default: - DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); - ASSERT(false); - break; - } - - if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) + if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) { //If supported, power sequencer mapping shall follow the DIG instance + uint8_t pwrseq_inst = 0xF; + + switch (init_data->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); + ASSERT(false); + break; + } + dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst; - else + } else { /* If not supported, pwrseq will be assigned in order, * so first pwrseq will be assigned to first panel instance (legacy behavior) */ dcn31_panel_cntl->base.pwrseq_inst = dcn31_panel_cntl->base.inst; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c index b2cea59ba5d4..9a92f73d5b7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c @@ -653,8 +653,9 @@ void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_lin if (!query_dp_alt_from_dmub(enc, &cmd)) return; - if (cmd.query_dp_alt.data.is_usb && - cmd.query_dp_alt.data.is_dp4 == 0) + if (cmd.query_dp_alt.data.is_dp_alt_disable == 0 && + cmd.query_dp_alt.data.is_usb && + cmd.query_dp_alt.data.is_dp4 == 0) link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); return; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c index d4a3e811aa39..ea0c9a9d0bd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c @@ -28,6 +28,7 @@ #include "link_encoder.h" #include "dcn31/dcn31_dio_link_encoder.h" #include "dcn35_dio_link_encoder.h" +#include "dc_dmub_srv.h" #define CTX \ enc10->base.ctx #define DC_LOGGER \ @@ -159,6 +160,8 @@ static const struct link_encoder_funcs dcn35_link_enc_funcs = { .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode, .get_max_link_cap = dcn31_link_encoder_get_max_link_cap, .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux, + .enable_dpia_output = dcn35_link_encoder_enable_dpia_output, + .disable_dpia_output = dcn35_link_encoder_disable_dpia_output, }; void dcn35_link_encoder_construct( @@ -265,3 +268,80 @@ void dcn35_link_encoder_construct( enc10->base.features.flags.bits.HDMI_6GB_EN = 0; } + +/* DPIA equivalent of link_transmitter_control. */ +static bool link_dpia_control(struct dc_context *dc_ctx, + struct dmub_cmd_dig_dpia_control_data *dpia_control) +{ + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.dig1_dpia_control.header.type = DMUB_CMD__DPIA; + cmd.dig1_dpia_control.header.sub_type = + DMUB_CMD__DPIA_DIG1_DPIA_CONTROL; + cmd.dig1_dpia_control.header.payload_bytes = + sizeof(cmd.dig1_dpia_control) - + sizeof(cmd.dig1_dpia_control.header); + + cmd.dig1_dpia_control.dpia_control = *dpia_control; + + dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + + return true; +} + +static void link_encoder_disable(struct dcn10_link_encoder *enc10) +{ + /* reset training complete */ + REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0); +} + +void dcn35_link_encoder_enable_dpia_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 }; + + enc1_configure_encoder(enc10, link_settings); + + dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_ENABLE; + dpia_control.enc_id = enc->preferred_engine; + dpia_control.mode_laneset.digmode = digmode; + dpia_control.lanenum = (uint8_t)link_settings->lane_count; + dpia_control.symclk_10khz = link_settings->link_rate * + LINK_RATE_REF_FREQ_IN_KHZ / 10; + /* DIG_BE_CNTL.DIG_HPD_SELECT set to 5 (hpdsel - 1) to indicate HPD pin unused by DPIA. */ + dpia_control.hpdsel = 6; + dpia_control.dpia_id = dpia_id; + dpia_control.fec_rdy = fec_rdy; + + DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id); + link_dpia_control(enc->ctx, &dpia_control); +} + +void dcn35_link_encoder_disable_dpia_output( + struct link_encoder *enc, + uint8_t dpia_id, + uint8_t digmode) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 }; + + if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc)) + return; + + dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_DISABLE; + dpia_control.enc_id = enc->preferred_engine; + dpia_control.mode_laneset.digmode = digmode; + dpia_control.dpia_id = dpia_id; + + DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id); + link_dpia_control(enc->ctx, &dpia_control); + + link_encoder_disable(enc10); +} diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h index d546a3676304..f9d4221f4b43 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h @@ -144,4 +144,22 @@ bool dcn35_is_dig_enabled(struct link_encoder *enc); enum signal_type dcn35_get_dig_mode(struct link_encoder *enc); void dcn35_link_encoder_setup(struct link_encoder *enc, enum signal_type signal); +/* + * Enable DP transmitter and its encoder for dpia port. + */ +void dcn35_link_encoder_enable_dpia_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy); + +/* + * Disable transmitter and its encoder for dpia port. + */ +void dcn35_link_encoder_disable_dpia_output( + struct link_encoder *enc, + uint8_t dpia_id, + uint8_t digmode); + #endif /* __DC_LINK_ENCODER__DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 2e4a46f1b499..5efddd48d5c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -158,6 +158,11 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable ); + +bool dm_helpers_dp_write_hblank_reduction( + struct dc_context *ctx, + const struct dc_stream_state *stream); + bool dm_helpers_is_dp_sink_present( struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 39525721c976..f1235bf9a596 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -1312,138 +1312,6 @@ bool dcn_validate_bandwidth( return false; } -static unsigned int dcn_find_normalized_clock_vdd_Level( - const struct dc *dc, - enum dm_pp_clock_type clocks_type, - int clocks_in_khz) -{ - int vdd_level = dcn_bw_v_min0p65; - - if (clocks_in_khz == 0)/*todo some clock not in the considerations*/ - return vdd_level; - - switch (clocks_type) { - case DM_PP_CLOCK_TYPE_DISPLAY_CLK: - if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK: - if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - case DM_PP_CLOCK_TYPE_DPPCLK: - if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - case DM_PP_CLOCK_TYPE_MEMORY_CLK: - { - unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels); - - if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - } - break; - - case DM_PP_CLOCK_TYPE_DCFCLK: - if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - default: - break; - } - return vdd_level; -} - -unsigned int dcn_find_dcfclk_suits_all( - const struct dc *dc, - struct dc_clocks *clocks) -{ - unsigned vdd_level, vdd_level_temp; - unsigned dcf_clk; - - /*find a common supported voltage level*/ - vdd_level = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz); - - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz); - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz); - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz); - - /*find that level conresponding dcfclk*/ - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - if (vdd_level == dcn_bw_v_max0p91) { - BREAK_TO_DEBUGGER(); - dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000; - } else if (vdd_level == dcn_bw_v_max0p9) - dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000; - else if (vdd_level == dcn_bw_v_nom0p8) - dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000; - else if (vdd_level == dcn_bw_v_mid0p72) - dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000; - else - dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000; - - DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk); - return dcf_clk; -} - void dcn_bw_update_from_pplib_fclks( struct dc *dc, struct dm_pp_clock_levels_with_voltage *fclks) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index 76d3bb3c9155..8d4873f80df0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -1562,6 +1562,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(dst_y_per_row_vblank * (double)htotal * ref_freq_to_pix_freq / (double)dpte_groups_per_row_ub_l); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 86ac7d59fd32..0748ef36a16a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1595,6 +1595,7 @@ double dml32_TruncToValidBPP( unsigned int NonDSCBPP0; unsigned int NonDSCBPP1; unsigned int NonDSCBPP2; + unsigned int NonDSCBPP3 = BPP_INVALID; if (Format == dm_420) { NonDSCBPP0 = 12; @@ -1603,6 +1604,7 @@ double dml32_TruncToValidBPP( MinDSCBPP = 6; MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; } else if (Format == dm_444) { + NonDSCBPP3 = 18; NonDSCBPP0 = 24; NonDSCBPP1 = 30; NonDSCBPP2 = 36; @@ -1667,6 +1669,8 @@ double dml32_TruncToValidBPP( return NonDSCBPP1; else if (MaxLinkBPP >= NonDSCBPP0) return 16.0; + else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3) + return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections. else return BPP_INVALID; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index beed7adbbd43..47d785204f29 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 1, + .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index 072bd0539605..6b2ab4ec2b5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -66,11 +66,15 @@ static inline double dml_max5(double a, double b, double c, double d, double e) static inline double dml_ceil(double a, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_ceil2(a, granularity); } static inline double dml_floor(double a, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_floor2(a, granularity); } @@ -114,11 +118,15 @@ static inline double dml_ceil_2(double f) static inline double dml_ceil_ex(double x, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_ceil2(x, granularity); } static inline double dml_floor_ex(double x, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_floor2(x, granularity); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index c4378e620cbf..91c4f3b4bd5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -29,7 +29,11 @@ dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) +frame_warn_flag := -Wframe-larger-than=4096 +else frame_warn_flag := -Wframe-larger-than=3072 +endif else frame_warn_flag := -Wframe-larger-than=2048 endif @@ -73,9 +77,8 @@ AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2)) AMD_DISPLAY_FILES += $(AMD_DAL_DML2) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) @@ -94,9 +97,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) @@ -113,9 +115,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_r CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags) -DML21 := src/dml2_top/dml_top.o -DML21 += src/dml2_top/dml_top_mcache.o -DML21 += src/dml2_top/dml2_top_optimization.o +DML21 := src/dml2_top/dml2_top_interfaces.o +DML21 += src/dml2_top/dml2_top_soc15.o DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 8dabb1ac0b68..84a2de9a76d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1736,7 +1736,7 @@ static void CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -static dml_float_t CalculateTWait( +static noinline_for_stack dml_float_t CalculateTWait( dml_uint_t PrefetchMode, enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, @@ -4458,7 +4458,7 @@ static void CalculateSwathWidth( } } // CalculateSwathWidth -static dml_float_t CalculateExtraLatency( +static noinline_for_stack dml_float_t CalculateExtraLatency( dml_uint_t RoundTripPingLatencyCycles, dml_uint_t ReorderingBytes, dml_float_t DCFCLK, @@ -5915,7 +5915,7 @@ static dml_uint_t DSCDelayRequirement( return DSCDelayRequirement_val; } -static dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, +static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_bool_t NotUrgentLatencyHiding[], dml_float_t ReadBandwidthLuma[], @@ -6019,7 +6019,7 @@ static void CalculatePrefetchBandwithSupport( #endif } -static dml_float_t CalculateBandwidthAvailableForImmediateFlip( +static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_float_t ReadBandwidthLuma[], @@ -6213,7 +6213,7 @@ static dml_uint_t CalculateMaxVStartup( return max_vstartup_lines; } -static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, +static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, dml_uint_t j, dml_uint_t k) @@ -6265,7 +6265,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; } -static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) { struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; @@ -6301,9 +6301,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.meta_row_bandwidth_this_state, mode_lib->ms.dpte_row_bandwidth_this_state, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor); + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j]); s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( &mode_lib->ms.soc, @@ -6434,7 +6434,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) /* Output */ &mode_lib->ms.UrgentBurstFactorCursorPre[k], &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], &mode_lib->ms.NotUrgentLatencyHidingPre[k]); mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * @@ -6458,9 +6458,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -6517,9 +6517,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw, mode_lib->ms.cursor_bw_pre, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre); @@ -6586,9 +6586,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -7809,9 +7809,9 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) mode_lib->ms.DETBufferSizeYThisState[k], mode_lib->ms.DETBufferSizeCThisState[k], /* Output */ - &mode_lib->ms.UrgentBurstFactorCursor[k], - &mode_lib->ms.UrgentBurstFactorLuma[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorCursor[j][k], + &mode_lib->ms.UrgentBurstFactorLuma[j][k], + &mode_lib->ms.UrgentBurstFactorChroma[j][k], &mode_lib->ms.NotUrgentLatencyHiding[k]); } @@ -8318,7 +8318,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dcfclk_option != dml_use_override_freq) locals->Dcfclk = mode_lib->ms.DCFCLK; else - locals->Dcfclk = clk_cfg->dcfclk_freq_mhz; + locals->Dcfclk = clk_cfg->dcfclk_mhz; #ifdef __DML_VBA_DEBUG__ dml_print_dml_policy(&mode_lib->ms.policy); @@ -8371,7 +8371,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dispclk_option == dml_use_required_freq) locals->Dispclk = locals->Dispclk_calculated; else if (clk_cfg->dispclk_option == dml_use_override_freq) - locals->Dispclk = clk_cfg->dispclk_freq_mhz; + locals->Dispclk = clk_cfg->dispclk_mhz; else locals->Dispclk = mode_lib->ms.state.dispclk_mhz; #ifdef __DML_VBA_DEBUG__ @@ -8412,7 +8412,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dppclk_option[k] == dml_use_required_freq) locals->Dppclk[k] = locals->Dppclk_calculated[k]; else if (clk_cfg->dppclk_option[k] == dml_use_override_freq) - locals->Dppclk[k] = clk_cfg->dppclk_freq_mhz[k]; + locals->Dppclk[k] = clk_cfg->dppclk_mhz[k]; else locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz; #ifdef __DML_VBA_DEBUG__ @@ -9190,6 +9190,8 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc &locals->FractionOfUrgentBandwidth, &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); @@ -9204,6 +9206,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc } } + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( mode_lib->ms.num_active_planes, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h index f951936bb579..dd3f43181a6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h @@ -28,6 +28,7 @@ #define __DISPLAY_MODE_CORE_STRUCT_H__ #include "display_mode_lib_defines.h" +#include "dml_top_display_cfg_types.h" enum dml_project_id { dml_project_invalid = 0, @@ -49,7 +50,9 @@ enum dml_use_mall_for_pstate_change_mode { dml_use_mall_pstate_change_disable = 0, dml_use_mall_pstate_change_full_frame = 1, dml_use_mall_pstate_change_sub_viewport = 2, - dml_use_mall_pstate_change_phantom_pipe = 3 + dml_use_mall_pstate_change_phantom_pipe = 3, + dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4, + dml_use_mall_pstate_change_imall = 5 }; enum dml_use_mall_for_static_screen_mode { dml_use_mall_static_screen_disable = 0, @@ -171,7 +174,11 @@ enum dml_swizzle_mode { dml_sw_256kb_z_x = 28, dml_sw_256kb_s_x = 29, dml_sw_256kb_d_x = 30, - dml_sw_256kb_r_x = 31 + dml_sw_256kb_r_x = 31, + dml_sw_256b_2d = 32, + dml_sw_4kb_2d = 33, + dml_sw_64kb_2d = 34, + dml_sw_256kb_2d = 35 }; enum dml_lb_depth { dml_lb_6 = 0, @@ -223,24 +230,28 @@ enum dml_mpc_use_policy { dml_mpc_disabled = 0, dml_mpc_as_possible = 1, dml_mpc_as_needed_for_voltage = 2, - dml_mpc_as_needed_for_pstate_and_voltage = 3 + dml_mpc_as_needed_for_pstate_and_voltage = 3, + dml_mpc_as_needed = 4, + dml_mpc_2to1 = 5 }; enum dml_odm_use_policy { dml_odm_use_policy_bypass = 0, dml_odm_use_policy_combine_as_needed = 1, dml_odm_use_policy_combine_2to1 = 2, - dml_odm_use_policy_combine_4to1 = 3, - dml_odm_use_policy_split_1to2 = 4, - dml_odm_use_policy_mso_1to2 = 5, - dml_odm_use_policy_mso_1to4 = 6 + dml_odm_use_policy_combine_3to1 = 3, + dml_odm_use_policy_combine_4to1 = 4, + dml_odm_use_policy_split_1to2 = 5, + dml_odm_use_policy_mso_1to2 = 6, + dml_odm_use_policy_mso_1to4 = 7 }; enum dml_odm_mode { dml_odm_mode_bypass = 0, dml_odm_mode_combine_2to1 = 1, - dml_odm_mode_combine_4to1 = 2, - dml_odm_mode_split_1to2 = 3, - dml_odm_mode_mso_1to2 = 4, - dml_odm_mode_mso_1to4 = 5 + dml_odm_mode_combine_3to1 = 2, + dml_odm_mode_combine_4to1 = 3, + dml_odm_mode_split_1to2 = 4, + dml_odm_mode_mso_1to2 = 5, + dml_odm_mode_mso_1to4 = 6 }; enum dml_writeback_configuration { dml_whole_buffer_for_single_stream_no_interleave = 0, @@ -289,6 +300,17 @@ struct soc_state_bounding_box_st { dml_float_t fclk_change_latency_us; dml_float_t usr_retraining_latency_us; dml_bool_t use_ideal_dram_bw_strobe; + dml_float_t g6_temp_read_blackout_us; + + struct { + dml_uint_t urgent_ramp_uclk_cycles; + dml_uint_t trip_to_memory_uclk_cycles; + dml_uint_t meta_trip_to_memory_uclk_cycles; + dml_uint_t maximum_latency_when_urgent_uclk_cycles; + dml_uint_t average_latency_when_urgent_uclk_cycles; + dml_uint_t maximum_latency_when_non_urgent_uclk_cycles; + dml_uint_t average_latency_when_non_urgent_uclk_cycles; + } dml_dcn401_uclk_dpm_dependent_soc_qos_params; }; struct soc_bounding_box_st { @@ -297,7 +319,7 @@ struct soc_bounding_box_st { dml_float_t pcierefclk_mhz; dml_float_t refclk_mhz; dml_float_t amclk_mhz; - dml_float_t max_outstanding_reqs; + dml_uint_t max_outstanding_reqs; dml_float_t pct_ideal_sdp_bw_after_urgent; dml_float_t pct_ideal_fabric_bw_after_urgent; dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only; @@ -308,6 +330,16 @@ struct soc_bounding_box_st { dml_float_t max_avg_fabric_bw_use_normal_percent; dml_float_t max_avg_dram_bw_use_normal_percent; dml_float_t max_avg_dram_bw_use_normal_strobe_percent; + + dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent; + dml_uint_t round_trip_ping_latency_dcfclk_cycles; dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes; dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; @@ -324,6 +356,26 @@ struct soc_bounding_box_st { dml_uint_t mall_allocated_for_dcn_mbytes; dml_float_t dispclk_dppclk_vco_speed_mhz; dml_bool_t do_urgent_latency_adjustment; + + dml_uint_t mem_word_bytes; + dml_uint_t num_dcc_mcaches; + dml_uint_t mcache_size_bytes; + dml_uint_t mcache_line_size_bytes; + + struct { + dml_bool_t UseNewDCN401SOCParameters; + dml_uint_t df_qos_response_time_fclk_cycles; + dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles; + dml_uint_t mall_overhead_fclk_cycles; + dml_uint_t meta_trip_adder_fclk_cycles; + dml_uint_t average_transport_distance_fclk_cycles; + dml_float_t umc_urgent_ramp_latency_margin; + dml_float_t umc_max_latency_margin; + dml_float_t umc_average_latency_margin; + dml_float_t fabric_max_transport_latency_margin; + dml_float_t fabric_average_transport_latency_margin; + } dml_dcn401_soc_qos_params; + }; struct ip_params_st { @@ -515,6 +567,10 @@ struct dml_plane_cfg_st { dml_uint_t CursorWidth[__DML_NUM_PLANES__]; dml_uint_t CursorBPP[__DML_NUM_PLANES__]; + dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__]; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__]; + enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__]; + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__]; enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__]; @@ -604,6 +660,17 @@ struct dml_hw_resource_st { dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer }; +/// @brief To control the clk usage for model programming +struct dml_clk_cfg_st { + enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__]; + + dml_float_t dcfclk_mhz; + dml_float_t dispclk_mhz; + dml_float_t dppclk_mhz[__DML_NUM_PLANES__]; +}; // dml_clk_cfg_st + /// @brief DML display configuration. /// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin struct dml_display_cfg_st { @@ -616,19 +683,9 @@ struct dml_display_cfg_st { unsigned int num_timings; struct dml_hw_resource_st hw; //< brief for mode programming + struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override }; // dml_display_cfg_st -/// @brief To control the clk usage for model programming -struct dml_clk_cfg_st { - enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq - enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq - enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__]; - - dml_float_t dcfclk_freq_mhz; - dml_float_t dispclk_freq_mhz; - dml_float_t dppclk_freq_mhz[__DML_NUM_PLANES__]; -}; // dml_clk_cfg_st - /// @brief DML mode evaluation and programming policy /// Those knobs that affect mode support and mode programming struct dml_mode_eval_policy_st { @@ -884,11 +941,11 @@ struct mode_support_st { dml_uint_t meta_row_height[__DML_NUM_PLANES__]; dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__]; dml_float_t UrgLatency; - dml_float_t UrgentBurstFactorCursor[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorLuma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorChroma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__]; dml_float_t MaximumSwathWidthInLineBufferLuma; dml_float_t MaximumSwathWidthInLineBufferChroma; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c index c247aee89caf..89890c88fd66 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c @@ -690,12 +690,12 @@ __DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); - dml_print("DML: clk_cfg: dcfclk_freq_mhz = %f\n", clk_cfg->dcfclk_freq_mhz); - dml_print("DML: clk_cfg: dispclk_freq_mhz = %f\n", clk_cfg->dispclk_freq_mhz); + dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz); + dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz); for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]); - dml_print("DML: clk_cfg: i=%d, dppclk_freq_mhz = %f\n", i, clk_cfg->dppclk_freq_mhz[i]); + dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index c6a5a8614679..b9c6b45f6872 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -10,7 +10,6 @@ #include "dml21_utils.h" #include "dml21_translation_helper.h" #include "bounding_boxes/dcn4_soc_bb.h" -#include "bounding_boxes/dcn3_soc_bb.h" static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, @@ -20,10 +19,6 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ const struct dml2_soc_qos_parameters *qos_params; switch (in_dc->ctx->dce_version) { - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. - soc_bb = &dml2_socbb_dcn31; - qos_params = &dml_dcn31_soc_qos_params; - break; case DCN_VERSION_4_01: default: if (config->bb_from_dmub) @@ -60,9 +55,6 @@ static void dml21_init_ip_params(struct dml2_initialize_instance_in_out *dml_ini const struct dml2_ip_capabilities *ip_caps; switch (in_dc->ctx->dce_version) { - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. - ip_caps = &dml2_dcn31_max_ip_caps; - break; case DCN_VERSION_4_01: default: ip_caps = &dml2_dcn401_max_ip_caps; @@ -302,12 +294,17 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in dml_soc_bb->power_management_parameters.stutter_exit_latency_us = (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; - if (in_dc->ctx->dc_bios->vram_info.num_chans) { + if (dc_bw_params->num_channels) { + dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; + dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; + } else if (in_dc->ctx->dc_bios->vram_info.num_chans) { dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; } - if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { + if (dc_bw_params->dram_channel_width_bytes) { + dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; + } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; } @@ -721,11 +718,21 @@ static void populate_dml21_surface_config_from_plane_state( surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; - if (in_dc->ctx->dce_version < DCN_VERSION_4_01) { - /* needed for N-1 testing */ + + // Update swizzle / array mode based on the gfx_format + switch (plane_state->tiling_info.gfxversion) { + case DcGfxVersion7: + case DcGfxVersion8: + // Placeholder for programming the array_mode + break; + case DcGfxVersion9: + case DcGfxVersion10: + case DcGfxVersion11: surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); - } else { + break; + case DcGfxAddr3: surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); + break; } } @@ -1077,28 +1084,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; -} - -void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) -{ - struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - - if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { - /* invalid register set index */ - return; - } - - /* convert to legacy format (time in ns) */ - watermark->urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->pte_meta_urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.cstate_enter_plus_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_enter / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.cstate_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_exit / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].uclk_pstate / refclk_freq_in_mhz) * 1000.0; - watermark->urgent_latency_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.fclk_pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].fclk_pstate / refclk_freq_in_mhz) * 1000.0; - watermark->frac_urg_bw_flip = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_flip; - watermark->frac_urg_bw_nom = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_nom; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; } static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) @@ -1144,53 +1131,6 @@ void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_se } } - -void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming) -{ - unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; - struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; - - hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right + pipe_ctx->hblank_borrow; - vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; - hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; - vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; - - hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right - pipe_ctx->hblank_borrow; - vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; - - if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { - /* phantom has its own global sync */ - global_sync = &stream_programming->phantom_stream.global_sync; - } - - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; - pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; - - pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; - - pipe_ctx->pipe_dlg_param.hactive = hactive; - pipe_ctx->pipe_dlg_param.vactive = vactive; - pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; - pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; - pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; - pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; - pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; - pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; - pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; - pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; - pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; - pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; - pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; - pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; -} - void dml21_map_hw_resources(struct dml2_context *dml_ctx) { unsigned int i = 0; @@ -1226,22 +1166,22 @@ void dml21_set_dc_p_state_type( bool sub_vp_enabled) { switch (stream_programming->uclk_pstate_method) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: pipe_ctx->p_state_type = P_STATE_V_ACTIVE; break; - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: if (sub_vp_enabled) pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; else pipe_ctx->p_state_type = P_STATE_V_BLANK; break; - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: pipe_ctx->p_state_type = P_STATE_SUB_VP; break; - case dml2_uclk_pstate_support_method_fw_drr: + case dml2_pstate_method_fw_drr: if (sub_vp_enabled) pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; else diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 476a7f6e4875..069b939c672a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -21,8 +21,6 @@ void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_ void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); -void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); -void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx); void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 51d491bffa32..1e56d995cd0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -142,108 +142,21 @@ int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, return num_pipes; } - -void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, - struct dml2_display_dlg_regs *disp_dlg_regs, - struct dml2_display_ttu_regs *disp_ttu_regs, - struct pipe_ctx *out) +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) { - memset(&out->rq_regs, 0, sizeof(out->rq_regs)); - out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; - out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; - //out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; - //out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; - out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; - out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; - out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; - out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; - - out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; - out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; - //out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; - //out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; - out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; - out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; - out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; - out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; - - out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; - out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; - //out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; - out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; - out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; - out->unbounded_req = rq_regs->unbounded_request_enabled; - - memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); - out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; - out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; - out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; - out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; - out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; - out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; - out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; - out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; - out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; - out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; - out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; - out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; - out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; - out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; - out->dlg_regs.refcyc_per_tdlut_group = disp_dlg_regs->refcyc_per_tdlut_group; - out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; - out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; - //out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; - //out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; - out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; - out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; - //out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; - //out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; - out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; - out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; - out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; - out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; - //out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; - //out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; - //out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; - //out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; - out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; - out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; - out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; - out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; - out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; - out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; - out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; - out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; - out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; - out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; - //out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; - //out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; - out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; - out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; - out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; - out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; - - memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); - out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; - out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; - out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; - out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; - out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; - out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; - out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; - //out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; - out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; - out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; - out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; - //out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; - out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; - out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; - out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; - //out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; - out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; - out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; - out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; - //out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + /* phantom has its own global sync */ + global_sync = &stream_programming->phantom_stream.global_sync; + } + + memcpy(&pipe_ctx->global_sync, + global_sync, + sizeof(union dml2_global_sync_programming)); } void dml21_populate_mall_allocation_size(struct dc_state *context, @@ -301,28 +214,16 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex { unsigned int pipe_reg_index = 0; - dml21_populate_pipe_ctx_dlg_params(dml_ctx, context, pipe_ctx, stream_prog); + dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); pipe_ctx->unbounded_req = false; - - /* legacy only, should be removed later */ - dml21_update_pipe_ctx_dchub_regs(&pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->rq_regs, - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->dlg_regs, - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); - pipe_ctx->det_buffer_size_kb = 0; } else { memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; - - /* legacy only, should be removed later */ - dml21_update_pipe_ctx_dchub_regs(&pln_prog->pipe_regs[pipe_reg_index]->rq_regs, - &pln_prog->pipe_regs[pipe_reg_index]->dlg_regs, - &pln_prog->pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); - pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } @@ -482,7 +383,8 @@ void dml21_build_fams2_programming(const struct dc *dc, unsigned int num_fams2_streams = 0; /* reset fams2 data */ - memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); if (dml_ctx->v21.mode_programming.programming->fams2_required) { @@ -490,8 +392,10 @@ void dml21_build_fams2_programming(const struct dc *dc, int dml_stream_idx; struct dc_stream_state *phantom_stream; struct dc_stream_status *phantom_status; + enum fams2_stream_type type = 0; - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams]; struct dc_stream_state *stream = context->streams[i]; @@ -508,28 +412,38 @@ void dml21_build_fams2_programming(const struct dc *dc, } /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); + memcpy(static_base_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, + sizeof(union dmub_cmd_fams2_config)); + memcpy(static_sub_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, + sizeof(union dmub_cmd_fams2_config)); + + switch (dc->debug.fams_version.minor) { + case 1: + default: + type = static_base_state->stream_v1.base.type; + + /* get information from context */ + static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count; + static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst; + + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_base_state->stream_v1.base.pipe_mask |= (1 << k); + static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k); + } } } } + /* get per method programming */ - switch (static_state->type) { + switch (type) { case FAMS2_STREAM_TYPE_VBLANK: case FAMS2_STREAM_TYPE_VACTIVE: case FAMS2_STREAM_TYPE_DRR: @@ -543,16 +457,27 @@ void dml21_build_fams2_programming(const struct dc *dc, /* phantom status should always be present */ ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + if (!phantom_status) + break; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h index d5153fbac921..4bff52eaaef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h @@ -18,10 +18,10 @@ struct dml2_display_ttu_regs; int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); -void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, - struct dml2_display_dlg_regs *disp_dlg_regs, - struct dml2_display_ttu_regs *disp_ttu_regs, - struct pipe_ctx *out); +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming); void dml21_populate_mall_allocation_size(struct dc_state *context, struct dml2_context *in_ctx, struct dml2_per_plane_programming *pln_prog, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index bbc28b9a15a3..fb80ba9287b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -75,7 +75,6 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co { switch (in_dc->ctx->dce_version) { case DCN_VERSION_4_01: - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; break; default: @@ -233,13 +232,6 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); dml21_copy_clocks_to_dc_state(dml_ctx, context); dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); - if (in_dc->ctx->dce_version == DCN_VERSION_3_2) { - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.a, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.b, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.c, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.d, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - } - dml21_build_fams2_programming(in_dc, context, dml_ctx); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h deleted file mode 100644 index d82c681a5402..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML_DML_DCN3_SOC_BB__ -#define __DML_DML_DCN3_SOC_BB__ - -#include "dml_top_soc_parameter_types.h" - -static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 12, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 257, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - .minimum_uclk_khz = 97, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 61, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 118, - }, - { - .minimum_uclk_khz = 435, - .urgent_ramp_uclk_cycles = 546, - .trip_to_memory_uclk_cycles = 848, - .meta_trip_to_memory_uclk_cycles = 848, - .maximum_latency_when_urgent_uclk_cycles = 146, - .average_latency_when_urgent_uclk_cycles = 90, - .maximum_latency_when_non_urgent_uclk_cycles = 848, - .average_latency_when_non_urgent_uclk_cycles = 135, - }, - { - .minimum_uclk_khz = 731, - .urgent_ramp_uclk_cycles = 632, - .trip_to_memory_uclk_cycles = 874, - .meta_trip_to_memory_uclk_cycles = 874, - .maximum_latency_when_urgent_uclk_cycles = 232, - .average_latency_when_urgent_uclk_cycles = 124, - .maximum_latency_when_non_urgent_uclk_cycles = 874, - .average_latency_when_non_urgent_uclk_cycles = 155, - }, - { - .minimum_uclk_khz = 1187, - .urgent_ramp_uclk_cycles = 716, - .trip_to_memory_uclk_cycles = 902, - .meta_trip_to_memory_uclk_cycles = 902, - .maximum_latency_when_urgent_uclk_cycles = 316, - .average_latency_when_urgent_uclk_cycles = 160, - .maximum_latency_when_non_urgent_uclk_cycles = 902, - .average_latency_when_non_urgent_uclk_cycles = 177, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, -}; - -static const struct dml2_soc_bb dml2_socbb_dcn31 = { - .clk_table = { - .uclk = { - .clk_values_khz = {97000, 435000, 731000, 1187000}, - .num_clk_values = 4, - }, - .fclk = { - .clk_values_khz = {300000, 2500000}, - .num_clk_values = 2, - }, - .dcfclk = { - .clk_values_khz = {200000, 1800000}, - .num_clk_values = 2, - }, - .dispclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dppclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dtbclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .phyclk = { - .clk_values_khz = {810000, 810000}, - .num_clk_values = 2, - }, - .socclk = { - .clk_values_khz = {300000, 1600000}, - .num_clk_values = 2, - }, - .dscclk = { - .clk_values_khz = {666667, 666667}, - .num_clk_values = 2, - }, - .phyclk_d18 = { - .clk_values_khz = {625000, 625000}, - .num_clk_values = 2, - }, - .phyclk_d32 = { - .clk_values_khz = {2000000, 2000000}, - .num_clk_values = 2, - }, - .dram_config = { - .channel_width_bytes = 2, - .channel_count = 16, - .transactions_per_clock = 16, - }, - }, - - .qos_parameters = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 0, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 260, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - // State 1 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 72, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 117, - }, - { - // State 2 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 546, - .trip_to_memory_uclk_cycles = 848, - .meta_trip_to_memory_uclk_cycles = 848, - .maximum_latency_when_urgent_uclk_cycles = 146, - .average_latency_when_urgent_uclk_cycles = 146, - .maximum_latency_when_non_urgent_uclk_cycles = 848, - .average_latency_when_non_urgent_uclk_cycles = 133, - }, - { - // State 3 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 564, - .trip_to_memory_uclk_cycles = 853, - .meta_trip_to_memory_uclk_cycles = 853, - .maximum_latency_when_urgent_uclk_cycles = 164, - .average_latency_when_urgent_uclk_cycles = 164, - .maximum_latency_when_non_urgent_uclk_cycles = 853, - .average_latency_when_non_urgent_uclk_cycles = 136, - }, - { - // State 4 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 613, - .trip_to_memory_uclk_cycles = 869, - .meta_trip_to_memory_uclk_cycles = 869, - .maximum_latency_when_urgent_uclk_cycles = 213, - .average_latency_when_urgent_uclk_cycles = 213, - .maximum_latency_when_non_urgent_uclk_cycles = 869, - .average_latency_when_non_urgent_uclk_cycles = 149, - }, - { - // State 5 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 632, - .trip_to_memory_uclk_cycles = 874, - .meta_trip_to_memory_uclk_cycles = 874, - .maximum_latency_when_urgent_uclk_cycles = 232, - .average_latency_when_urgent_uclk_cycles = 232, - .maximum_latency_when_non_urgent_uclk_cycles = 874, - .average_latency_when_non_urgent_uclk_cycles = 153, - }, - { - // State 6 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 665, - .trip_to_memory_uclk_cycles = 885, - .meta_trip_to_memory_uclk_cycles = 885, - .maximum_latency_when_urgent_uclk_cycles = 265, - .average_latency_when_urgent_uclk_cycles = 265, - .maximum_latency_when_non_urgent_uclk_cycles = 885, - .average_latency_when_non_urgent_uclk_cycles = 161, - }, - { - // State 7 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 689, - .trip_to_memory_uclk_cycles = 895, - .meta_trip_to_memory_uclk_cycles = 895, - .maximum_latency_when_urgent_uclk_cycles = 289, - .average_latency_when_urgent_uclk_cycles = 289, - .maximum_latency_when_non_urgent_uclk_cycles = 895, - .average_latency_when_non_urgent_uclk_cycles = 167, - }, - { - // State 8 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 716, - .trip_to_memory_uclk_cycles = 902, - .meta_trip_to_memory_uclk_cycles = 902, - .maximum_latency_when_urgent_uclk_cycles = 316, - .average_latency_when_urgent_uclk_cycles = 316, - .maximum_latency_when_non_urgent_uclk_cycles = 902, - .average_latency_when_non_urgent_uclk_cycles = 174, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, - }, - - .power_management_parameters = { - .dram_clk_change_blackout_us = 400, - .fclk_change_blackout_us = 0, - .g7_ppt_blackout_us = 0, - .stutter_enter_plus_exit_latency_us = 50, - .stutter_exit_latency_us = 43, - .z8_stutter_enter_plus_exit_latency_us = 0, - .z8_stutter_exit_latency_us = 0, - }, - - .vmin_limit = { - .dispclk_khz = 600 * 1000, - }, - - .dprefclk_mhz = 700, - .xtalclk_mhz = 100, - .pcie_refclk_mhz = 100, - .dchub_refclk_mhz = 50, - .mall_allocated_for_dcn_mbytes = 64, - .max_outstanding_reqs = 512, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .hostvm_min_page_size_kbytes = 0, - .gpuvm_min_page_size_kbytes = 256, - .phy_downspread_percent = 0, - .dcn_downspread_percent = 0, - .dispclk_dppclk_vco_speed_mhz = 4500, - .do_urgent_latency_adjustment = 0, - .mem_word_bytes = 32, - .num_dcc_mcaches = 8, - .mcache_size_bytes = 2048, - .mcache_line_size_bytes = 32, - .max_fclk_for_uclk_dpm_khz = 1250 * 1000, -}; - -static const struct dml2_ip_capabilities dml2_dcn31_max_ip_caps = { - .pipe_count = 4, - .otg_count = 4, - .num_dsc = 4, - .max_num_dp2p0_streams = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1152, - .meta_fifo_size_in_kentries = 22, - .compressed_buffer_segment_size_in_kbytes = 64, - .subvp_drr_scheduling_margin_us = 100, - .subvp_prefetch_end_to_mall_start_us = 15, - .subvp_fw_processing_delay = 15, - - .fams2 = { - .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, - .vertical_interrupt_ack_delay_us = 18, - .allow_programming_delay_us = 18, - .min_allow_width_us = 20, - .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, - .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, - }, -}; - -#endif /* __DML_DML_DCN3_SOC_BB__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 8ef7977841de..793e1c038efd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -344,6 +344,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .cursor_buffer_size = 24, .max_flip_time_us = 80, .max_flip_time_lines = 32, .hostvm_mode = 0, @@ -354,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 125, + .scheduling_delay_us = 550, .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index b132f676a68d..5e1ab6d97640 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -10,9 +10,10 @@ #define DML2_MAX_PLANES 8 #define DML2_MAX_DCN_PIPES 8 #define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches +#define DML2_MAX_WRITEBACK 3 enum dml2_swizzle_mode { - dml2_sw_linear, + dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled dml2_sw_256b_2d, dml2_sw_4kb_2d, dml2_sw_64kb_2d, @@ -24,7 +25,8 @@ enum dml2_swizzle_mode { dml2_gfx11_sw_64kb_d_x, dml2_gfx11_sw_64kb_r_x, dml2_gfx11_sw_256kb_d_x, - dml2_gfx11_sw_256kb_r_x + dml2_gfx11_sw_256kb_r_x, + }; enum dml2_source_format_class { @@ -38,7 +40,13 @@ enum dml2_source_format_class { dml2_rgbe_alpha = 9, dml2_rgbe = 10, dml2_mono_8 = 11, - dml2_mono_16 = 12 + dml2_mono_16 = 12, + dml2_422_planar_8 = 13, + dml2_422_planar_10 = 14, + dml2_422_planar_12 = 15, + dml2_422_packed_8 = 16, + dml2_422_packed_10 = 17, + dml2_422_packed_12 = 18 }; enum dml2_rotation_angle { @@ -121,15 +129,6 @@ enum dml2_dsc_enable_option { dml2_dsc_enable_if_necessary = 2 }; -enum dml2_pstate_support_method { - dml2_pstate_method_uninitialized, - dml2_pstate_method_not_supported, - dml2_pstate_method_vactive, - dml2_pstate_method_vblank, - dml2_pstate_method_svp, - dml2_pstate_method_drr -}; - enum dml2_tdlut_addressing_mode { dml2_tdlut_sw_linear = 0, dml2_tdlut_simple_linear = 1 @@ -287,22 +286,23 @@ struct dml2_link_output_cfg { bool validate_output; // Do not validate the link configuration for this display stream. }; -struct dml2_writeback_cfg { - bool enable; +struct dml2_writeback_info { enum dml2_source_format_class pixel_format; - unsigned int active_writebacks_per_surface; + unsigned long input_width; + unsigned long input_height; + unsigned long output_width; + unsigned long output_height; + unsigned long v_taps; + unsigned long h_taps; + unsigned long v_taps_chroma; + unsigned long h_taps_chroma; + double h_ratio; + double v_ratio; +}; - struct { - bool enabled; - unsigned long input_width; - unsigned long input_height; - unsigned long output_width; - unsigned long output_height; - unsigned long v_taps; - unsigned long h_taps; - double h_ratio; - double v_ratio; - } scaling_info; +struct dml2_writeback_cfg { + unsigned int active_writebacks_per_stream; + struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK]; }; struct dml2_plane_parameters { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index ebd8abe894a9..5f0bc42d1d2f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -167,11 +167,13 @@ struct dml2_ip_capabilities { unsigned int max_num_dp2p0_streams; unsigned int max_num_hdmi_frl_outputs; unsigned int max_num_dp2p0_outputs; + unsigned int max_num_wb; unsigned int rob_buffer_size_kbytes; unsigned int config_return_buffer_size_in_kbytes; unsigned int config_return_buffer_segment_size_in_kbytes; unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int cursor_buffer_size; unsigned int max_flip_time_us; unsigned int max_flip_time_lines; unsigned int hostvm_mode; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index eeb96c455658..d2d053f2354d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -26,20 +26,14 @@ enum dml2_project_id { dml2_project_dcn4x_stage2_auto_drr_svp = 3, }; -enum dml2_dram_clock_change_support { - dml2_dram_clock_change_vactive = 0, - dml2_dram_clock_change_vblank = 1, - dml2_dram_clock_change_vblank_and_vactive = 2, - dml2_dram_clock_change_drr = 3, - dml2_dram_clock_change_mall_svp = 4, - dml2_dram_clock_change_mall_full_frame = 6, - dml2_dram_clock_change_unsupported = 7 -}; - -enum dml2_fclock_change_support { - dml2_fclock_change_vactive = 0, - dml2_fclock_change_vblank = 1, - dml2_fclock_change_unsupported = 2 +enum dml2_pstate_change_support { + dml2_pstate_change_vactive = 0, + dml2_pstate_change_vblank = 1, + dml2_pstate_change_vblank_and_vactive = 2, + dml2_pstate_change_drr = 3, + dml2_pstate_change_mall_svp = 4, + dml2_pstate_change_mall_full_frame = 6, + dml2_pstate_change_unsupported = 7 }; enum dml2_output_type_and_rate__type { @@ -202,24 +196,23 @@ struct dml2_mcache_surface_allocation { } informative; }; -enum dml2_uclk_pstate_support_method { - dml2_uclk_pstate_support_method_not_supported = 0, - /* hw */ - dml2_uclk_pstate_support_method_vactive = 1, - dml2_uclk_pstate_support_method_vblank = 2, - dml2_uclk_pstate_support_method_reserved_hw = 5, - /* fw */ - dml2_uclk_pstate_support_method_fw_subvp_phantom = 6, - dml2_uclk_pstate_support_method_reserved_fw = 10, - /* fw w/drr */ - dml2_uclk_pstate_support_method_fw_vactive_drr = 11, - dml2_uclk_pstate_support_method_fw_vblank_drr = 12, - dml2_uclk_pstate_support_method_fw_subvp_phantom_drr = 13, - dml2_uclk_pstate_support_method_reserved_fw_drr_fixed = 20, - dml2_uclk_pstate_support_method_fw_drr = 21, - dml2_uclk_pstate_support_method_reserved_fw_drr_var = 22, - - dml2_uclk_pstate_support_method_count +enum dml2_pstate_method { + dml2_pstate_method_na = 0, + /* hw exclusive modes */ + dml2_pstate_method_vactive = 1, + dml2_pstate_method_vblank = 2, + dml2_pstate_method_reserved_hw = 5, + /* fw assisted exclusive modes */ + dml2_pstate_method_fw_svp = 6, + dml2_pstate_method_reserved_fw = 10, + /* fw assisted modes requiring drr modulation */ + dml2_pstate_method_fw_vactive_drr = 11, + dml2_pstate_method_fw_vblank_drr = 12, + dml2_pstate_method_fw_svp_drr = 13, + dml2_pstate_method_reserved_fw_drr_clamped = 20, + dml2_pstate_method_fw_drr = 21, + dml2_pstate_method_reserved_fw_drr_var = 22, + dml2_pstate_method_count }; struct dml2_per_plane_programming { @@ -241,7 +234,7 @@ struct dml2_per_plane_programming { // If a stream is using odm split, then this value is always 1 unsigned int num_dpps_required; - enum dml2_uclk_pstate_support_method uclk_pstate_support_method; + enum dml2_pstate_method uclk_pstate_support_method; // MALL size requirements for MALL SS and SubVP unsigned int surface_size_mall_bytes; @@ -281,7 +274,7 @@ struct dml2_per_stream_programming { unsigned int num_odms_required; - enum dml2_uclk_pstate_support_method uclk_pstate_method; + enum dml2_pstate_method uclk_pstate_method; struct { bool enabled; @@ -289,7 +282,8 @@ struct dml2_per_stream_programming { union dml2_global_sync_programming global_sync; } phantom_stream; - struct dmub_fams2_stream_static_state fams2_params; + union dmub_cmd_fams2_config fams2_base_params; + union dmub_cmd_fams2_config fams2_sub_params; }; //----------------- @@ -339,7 +333,7 @@ struct dml2_mode_support_info { bool DCCMetaBufferSizeNotExceeded; bool TotalVerticalActiveBandwidthSupport; bool VActiveBandwidthSupport; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; bool USRRetrainingSupport; bool PrefetchSupported; bool DynamicMetadataSupported; @@ -361,6 +355,7 @@ struct dml2_mode_support_info { unsigned int AlignedYPitch[DML2_MAX_PLANES]; unsigned int AlignedCPitch[DML2_MAX_PLANES]; bool g6_temp_read_support; + bool temp_read_or_ppt_support; }; // dml2_mode_support_info struct dml2_display_cfg_programming { @@ -392,6 +387,11 @@ struct dml2_display_cfg_programming { unsigned long fclk_khz; unsigned long dcfclk_khz; } svp_prefetch; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } svp_prefetch_no_throttle; unsigned long deepsleep_dcfclk_khz; unsigned long dispclk_khz; @@ -444,7 +444,7 @@ struct dml2_display_cfg_programming { double pstate_change_us; double fclk_pstate_change_us; double usr_retraining_us; - double g6_temp_read_watermark_us; + double temp_read_or_ppt_watermark_us; } watermarks; struct { @@ -653,6 +653,7 @@ struct dml2_display_cfg_programming { double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double WritebackRequiredBandwidth; double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; double DSCCLK_calculated[DML2_MAX_PLANES]; @@ -662,6 +663,7 @@ struct dml2_display_cfg_programming { double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY bool ROBUrgencyAvoidance; + double LowestPrefetchMargin; } misc; struct dml2_mode_support_info mode_support_info; @@ -675,6 +677,7 @@ struct dml2_display_cfg_programming { bool failed_mcache_validation; bool failed_dpmm; bool failed_mode_programming; + bool failed_map_watermarks; } informative; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 3d41ffde91c1..d68b4567e218 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -9,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { +struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, @@ -90,6 +90,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes; ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; + ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; @@ -114,6 +115,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes; ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; + ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; @@ -316,28 +318,9 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Setup the appropriate p-state strategy if (display_cfg->stage3.performed && display_cfg->stage3.success) { - switch (display_cfg->stage3.pstate_switch_modes[plane_index]) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_drr: - case dml2_uclk_pstate_support_method_fw_vactive_drr: - case dml2_uclk_pstate_support_method_fw_vblank_drr: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: - programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; - break; - case dml2_uclk_pstate_support_method_reserved_hw: - case dml2_uclk_pstate_support_method_reserved_fw: - case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: - case dml2_uclk_pstate_support_method_reserved_fw_drr_var: - case dml2_uclk_pstate_support_method_not_supported: - case dml2_uclk_pstate_support_method_count: - default: - programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - break; - } + programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; } else { - programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); @@ -360,7 +343,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in /* unconditionally populate fams2 params */ dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, - &programming->stream_programming[main_plane->stream_index].fams2_params, + &programming->stream_programming[main_plane->stream_index].fams2_base_params, + &programming->stream_programming[main_plane->stream_index].fams2_sub_params, programming->stream_programming[main_plane->stream_index].uclk_pstate_method, plane_index); @@ -572,18 +556,18 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else { if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive; else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank; else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 601320b1be81..8ed49a9df378 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -11,6 +11,9 @@ #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define DML_MAX_COMPRESSION_RATIO 4 +//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW +//#define DML_GLOBAL_PREFETCH_CHECK #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) @@ -132,9 +135,9 @@ static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_su dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); @@ -315,12 +318,11 @@ dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); -dml_get_var_func(wm_g6_temp_read, double, mode_lib->mp.Watermark.g6_temp_read_watermark_us); +dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); -dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); @@ -355,7 +357,9 @@ dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_ban dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); +dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); +dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); @@ -466,6 +470,24 @@ static bool dml_is_420(enum dml2_source_format_class source_format) case dml2_420_12: val = 1; break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; case dml2_rgbe_alpha: val = 0; break; @@ -487,32 +509,31 @@ static bool dml_is_420(enum dml2_source_format_class source_format) static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) { - switch (sw_mode) { - case (dml2_sw_linear): - return 256; break; - case (dml2_sw_256b_2d): - return 256; break; - case (dml2_sw_4kb_2d): - return 4096; break; - case (dml2_sw_64kb_2d): - return 65536; break; - case (dml2_sw_256kb_2d): - return 262144; break; - case (dml2_gfx11_sw_linear): - return 256; break; - case (dml2_gfx11_sw_64kb_d): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_t): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_x): - return 65536; break; - case (dml2_gfx11_sw_64kb_r_x): - return 65536; break; - case (dml2_gfx11_sw_256kb_d_x): - return 262144; break; - case (dml2_gfx11_sw_256kb_r_x): - return 262144; break; - default: + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { DML2_ASSERT(0); return 256; } @@ -579,8 +600,8 @@ static void CalculateBytePerPixelAndBlockSizes( { *BytePerPixelDETY = 0; *BytePerPixelDETC = 0; - *BytePerPixelY = 0; - *BytePerPixelC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 1; if (SourcePixelFormat == dml2_444_64) { *BytePerPixelDETY = 8; @@ -820,7 +841,7 @@ static void CalculateSwathWidth( // Output unsigned int req_per_swath_ub_l[], unsigned int req_per_swath_ub_c[], - unsigned int SwathWidthSingleDPPY[], + unsigned int SwathWidthSingleDPPY[], // post-rotated plane width unsigned int SwathWidthSingleDPPC[], unsigned int SwathWidthY[], // per-pipe unsigned int SwathWidthC[], // per-pipe @@ -1403,7 +1424,6 @@ static unsigned int dscceComputeDelay( // N422/N420 operate at 2 pixels per clock unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; - if (pixelFormat == dml2_420) pixelsPerClock = 2; // #all other modes operate at 1 pixel per clock @@ -1428,7 +1448,6 @@ static unsigned int dscceComputeDelay( } } - //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard if (bpc == 8) ssm_group_priming_delay = 83; @@ -1447,9 +1466,6 @@ static unsigned int dscceComputeDelay( //determine number of padded pixels in the last group of a slice line, computed as slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; - - - //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; @@ -1463,7 +1479,6 @@ static unsigned int dscceComputeDelay( //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; - //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next @@ -1506,7 +1521,6 @@ static unsigned int dscceComputeDelay( return pixels; } - //updated in dcn4 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) { @@ -2090,7 +2104,6 @@ static void CalculateDCCConfiguration( yuv420 = 1; else yuv420 = 0; - horz_div_l = 1; horz_div_c = 1; vert_div_l = 1; @@ -2561,8 +2574,7 @@ static void calculate_mcache_setting( if (*p->num_mcaches_l) { l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; } - - if (l->is_dual_plane && *p->num_mcaches_c) { + if (l->is_dual_plane) { l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { @@ -2682,12 +2694,12 @@ static double dml_get_return_bandwidth_available( bool is_avg_bw, bool is_hvm_en, bool is_hvm_only, - double dcflk_mhz, + double dcfclk_mhz, double fclk_mhz, double dram_bw_mbps) { double return_bw_mbps = 0.; - double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; @@ -2753,7 +2765,7 @@ static double dml_get_return_bandwidth_available( dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); - dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz); + dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); @@ -2766,7 +2778,7 @@ static double dml_get_return_bandwidth_available( return return_bw_mbps; } -static void calculate_bandwidth_available( +static noinline_for_stack void calculate_bandwidth_available( double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM @@ -3516,10 +3528,9 @@ static void CalculateUrgentBurstFactor( dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif - } -static void CalculateDCFCLKDeepSleep( +static void CalculateDCFCLKDeepSleepTdlut( const struct dml2_display_cfg *display_cfg, unsigned int NumberOfActiveSurfaces, unsigned int BytePerPixelY[], @@ -3534,6 +3545,10 @@ static void CalculateDCFCLKDeepSleep( double ReadBandwidthChroma[], unsigned int ReturnBusWidth, + double dispclk, + unsigned int tdlut_bytes_to_deliver[], + double prefetch_swath_time_us[], + // Output double *DCFClkDeepSleep) { @@ -3568,6 +3583,22 @@ static void CalculateDCFCLKDeepSleep( } DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + // adjust for 3dlut delivery time + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { + double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; + + dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + + // increase the deepsleep dcfclk to match the original dispclk throughput rate + if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); + } + } + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); @@ -3590,9 +3621,56 @@ static void CalculateDCFCLKDeepSleep( for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); } + dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); } +static noinline_for_stack void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double zero_double[DML2_MAX_PLANES]; + unsigned int zero_integer[DML2_MAX_PLANES]; + + memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); + memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + NumberOfActiveSurfaces, + BytePerPixelY, + BytePerPixelC, + SwathWidthY, + SwathWidthC, + DPPPerSurface, + PSCL_THROUGHPUT, + PSCL_THROUGHPUT_CHROMA, + Dppclk, + ReadBandwidthLuma, + ReadBandwidthChroma, + ReturnBusWidth, + 0, + zero_integer, //tdlut_bytes_to_deliver, + zero_double, //prefetch_swath_time_us, + + // Output + DCFClkDeepSleep); +} + static double CalculateWriteBackDelay( enum dml2_source_format_class WritebackPixelFormat, double WritebackHRatio, @@ -3816,8 +3894,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; } if (p->SwathHeightC[k] == 0) @@ -4064,7 +4142,7 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode, return true; } -static void CalculateODMMode( +static noinline_for_stack void CalculateODMMode( unsigned int MaximumPixelsPerLinePerDSCUnit, unsigned int HActive, enum dml2_output_format_class OutFormat, @@ -4161,7 +4239,7 @@ static void CalculateODMMode( #endif } -static void CalculateOutputLink( +static noinline_for_stack void CalculateOutputLink( struct dml2_core_internal_scratch *s, double PHYCLK, double PHYCLKD18, @@ -4592,6 +4670,7 @@ static void calculate_tdlut_setting( *p->tdlut_groups_per_2row_ub = 0; *p->tdlut_opt_time = 0; *p->tdlut_drain_time = 0; + *p->tdlut_bytes_to_deliver = 0; *p->tdlut_bytes_per_group = 0; *p->tdlut_pte_bytes_per_frame = 0; *p->tdlut_bytes_per_frame = 0; @@ -4660,6 +4739,7 @@ static void calculate_tdlut_setting( *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } #ifdef __DML_VBA_DEBUG__ @@ -4680,6 +4760,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); #endif } @@ -5069,20 +5150,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->trip_to_mem = 0.0; *p->Tvm_trips = 0.0; *p->Tr0_trips = 0.0; - s->Tvm_no_trip_oto = 0.0; - s->Tr0_no_trip_oto = 0.0; s->Tvm_trips_rounded = 0.0; s->Tr0_trips_rounded = 0.0; s->max_Tsw = 0.0; s->Lsw_oto = 0.0; - s->Tpre_rounded = 0.0; + *p->Tpre_rounded = 0.0; s->prefetch_bw_equ = 0.0; s->Tvm_equ = 0.0; s->Tr0_equ = 0.0; s->Tdmbf = 0.0; s->Tdmec = 0.0; s->Tdmsks = 0.0; - s->prefetch_sw_bytes = 0.0; + *p->prefetch_sw_bytes = 0.0; s->prefetch_bw_pr = 0.0; s->bytes_pp = 0.0; s->dep_bytes = 0.0; @@ -5207,6 +5286,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); #endif if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) @@ -5277,23 +5357,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; } - s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface; - if (p->myPipe->VRatio < 1.0) - s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; - s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); - - s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; - s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; - s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); - - s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); - - s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); @@ -5302,57 +5367,103 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + + // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto + // Note: in prefetch calculation, acounting is done mostly per-pipe. + // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time + s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; + + // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) + s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; + + if (p->myPipe->BytePerPixelC > 0) { + s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; + s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; + } + + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; + + s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); + + s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); - s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); +#endif if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_no_trip_oto = math_max2( + s->Tvm_oto = math_max3( + *p->Tvm_trips, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, s->LineTime / 4.0); - s->Tvm_oto = math_max2( - *p->Tvm_trips, - s->Tvm_no_trip_oto); + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { - s->Tvm_no_trip_oto = s->Tvm_trips_rounded; s->Tvm_oto = s->Tvm_trips_rounded; } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { - s->Tr0_no_trip_oto = math_max2( + s->Tr0_oto = math_max3( + *p->Tr0_trips, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); - s->Tr0_oto = math_max2( - *p->Tr0_trips, - s->Tr0_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif - } else { - s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0; - s->Tr0_oto = s->Tr0_no_trip_oto; - } + } else + s->Tr0_oto = s->LineTime / 4.0; s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; +#ifdef DML_GLOBAL_PREFETCH_CHECK + dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + if (p->impacted_dst_y_pre > 0) { + dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); + dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + } +#endif + *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; + //To (time for delay after scaler) in line time Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); //Tpre_equ in line time if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); + + s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + + if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) + s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; +#endif + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ @@ -5370,7 +5481,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); @@ -5394,7 +5505,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif double Tpre = s->dst_y_prefetch_equ * s->LineTime; s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; - s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); @@ -5420,7 +5531,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); #endif @@ -5434,78 +5545,85 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tpre_rounded is Tpre rounding to 2-bit fraction // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time - // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + // So that means prefetch bw calculated can be higher since the total time available for prefetch is less + bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + bool tpre_gt_req_latency = true; +#if 0 + // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. + // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. + // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. + tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); +#endif - if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; s->prefetch_bw4 = 0.; // prefetch_bw1: VM + 2*R0 + SW - if (s->Tpre_rounded - *p->Tno_bw > 0) { + if (*p->Tpre_rounded - *p->Tno_bw > 0) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) - + s->prefetch_sw_bytes) - / (s->Tpre_rounded - *p->Tno_bw); - s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1; + + *p->prefetch_sw_bytes) + / (*p->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; } else s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); - dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); + dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { - s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); - s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; } else s->prefetch_bw2 = 0; dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); - if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { - s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); } // prefetch_bw3: 2*R0 + SW - if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / - (s->Tpre_rounded - s->Tvm_trips_rounded); - s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3; + if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; } else s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } // prefetch_bw4: SW - if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) - s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); else s->prefetch_bw4 = 0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); @@ -5617,9 +5735,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif - // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) - s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0; - // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { *p->dst_y_prefetch = s->dst_y_prefetch_oto; @@ -5628,31 +5743,33 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0; - s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif + } else { *p->dst_y_prefetch = s->dst_y_prefetch_equ; + + if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) + *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; + s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank; - s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank; + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } - /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */ - s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); @@ -5663,6 +5780,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); @@ -5749,8 +5867,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", - __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n", + __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5769,13 +5889,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (vm_bytes == 0) { prefetch_vm_bw = 0; - } else if (s->dst_y_per_vm_no_trip_vblank > 0) { + } else if (*p->dst_y_per_vm_vblank > 0) { #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif - prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime); + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -5787,8 +5907,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { prefetch_row_bw = 0; - } else if (s->dst_y_per_row_no_trip_vblank > 0) { - prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime); + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); @@ -5828,6 +5948,171 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch return s->NoTimeToPrefetch; } +static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, + unsigned int line_buffer_size_bits, + unsigned int num_pipes, + unsigned int vp_width, + unsigned int vp_height, + double h_ratio, + enum dml2_rotation_angle rotation_angle) +{ + unsigned int num_lb_source_lines = 0; + double lb_bit_per_pixel = 57.0; + unsigned recin_width = vp_width/num_pipes; + + if (dml_is_vertical_rotation(rotation_angle)) + recin_width = vp_height/num_pipes; + + num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, + math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); + + return num_lb_source_lines; +} + +static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) +{ + int max_value = -1; + int max_idx = -1; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { + max_value = Trpd_dcfclk_cycles[i]; + max_idx = i; + } + } + if (max_idx <= 0) { + dml2_assert(max_idx >= 0); + max_idx = this_plane_idx; + } + + return max_idx; +} + +static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) +{ + double sum = 0.; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != exclude_plane_idx) { + sum += prefetch_swath_bytes[i]; + } + } + return sum / bw_mbps; +} + +// a global check against the aggregate effect of the per plane prefetch schedule +static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) +{ + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; + unsigned int i, k; + + memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); + + *p->recalc_prefetch_schedule = 0; + s->prefetch_global_check_passed = 1; + // worst case if the rob and cdb is fully hogged + s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); +#endif + + // calculate the return impact from each plane, request is 256B per dcfclk + for (i = 0; i < p->num_active_planes; i++) { + s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; + s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; + s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; + s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; + + if (p->pixel_format[i] == dml2_420_10) { + s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); + s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); + s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); + s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); + } + + s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); +#endif + + if (s->src_swath_bytes_c[i] > 0) { // dual_plane + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); + + if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); +#endif + } + + s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate + s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); +#endif + // clamping to worst case delay which is one which occupy the full rob+cdb + if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) + s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; + } + + // Figure out the impacted prefetch time for each plane + // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw + for (i = 0; i < p->num_active_planes; i++) { + k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i + // the rest of planes (except for k) complete for bw + p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; + p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); + p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); +#endif + } + + if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { + for (i = 0; i < p->num_active_planes; i++) { + if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { + s->prefetch_global_check_passed = 0; + *p->recalc_prefetch_schedule = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); +#endif + } + } else { + // likely a mode programming calls, assume support, and no recalc - not used anyways + s->prefetch_global_check_passed = 1; + *p->recalc_prefetch_schedule = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); +#endif + + return s->prefetch_global_check_passed; +} + static void calculate_peak_bandwidth_required( struct dml2_core_internal_scratch *s, struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) @@ -6046,7 +6331,7 @@ static void check_urgent_bandwidth_support( double *frac_urg_bandwidth_nom, double *frac_urg_bandwidth_mall, bool *vactive_bandwidth_support_ok, // vactive ok - bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok + bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok unsigned int mall_allocated_for_dcn_mbytes, double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], @@ -6116,7 +6401,6 @@ static void check_urgent_bandwidth_support( } } #endif - } static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, @@ -6438,7 +6722,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; } - p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; + p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); @@ -6454,12 +6738,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: g6_temp_read_watermark_us = %f\n", __func__, p->Watermark->g6_temp_read_watermark_us); + dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); #endif s->TotalActiveWriteback = 0; for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { s->TotalActiveWriteback = s->TotalActiveWriteback + 1; } } @@ -6522,7 +6806,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines); + dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); @@ -6563,7 +6847,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; - s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->g6_temp_read_watermark_us; + s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; if (p->VActiveLatencyHidingMargin) p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; @@ -6571,9 +6855,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( if (p->VActiveLatencyHidingUs) p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { - s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0); - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; } s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; @@ -6588,36 +6875,36 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; - p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported; + p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; if (s->ActiveFCLKChangeLatencyMargin[k] > 0) - p->FCLKChangeSupport[k] = dml2_fclock_change_vactive; + p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) - p->FCLKChangeSupport[k] = dml2_fclock_change_vblank; + p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; - if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported) + if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) *p->global_fclk_change_supported = false; - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { if (p->display_cfg->overrides.all_streams_blanked || (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; - if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported) + if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) *p->global_dram_clock_change_supported = false; s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); @@ -6725,7 +7012,7 @@ static void calculate_bytes_to_fetch_required_to_hide_latency( } } -static void calculate_vactive_det_fill_latency( +static noinline_for_stack void calculate_vactive_det_fill_latency( const struct dml2_display_cfg *display_cfg, unsigned int num_active_planes, unsigned int bytes_required_l[], @@ -6915,8 +7202,7 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -static const struct dml2_core_internal_g6_temp_read_blackouts_table - core_dcn4_g6_temp_read_blackout_table = { +struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, @@ -7036,6 +7322,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; @@ -7083,12 +7372,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; k++) dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); - - // dml2_printf_dml_policy(&mode_lib->ms.policy); - // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -7183,8 +7466,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); @@ -7194,35 +7477,35 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); - dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); #endif } // Writeback bandwidth for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { - mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; } else { - mode_lib->ms.WriteBandwidth[k] = 0.0; + mode_lib->ms.WriteBandwidth[k][0] = 0.0; } } /*Writeback Latency support check*/ mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && + (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7231,19 +7514,19 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Writeback Scale Ratio and Taps Support Check */ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; } - if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; } } @@ -7423,8 +7706,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; @@ -7671,16 +7954,16 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out //DISPCLK/DPPCLK mode_lib->ms.WritebackRequiredDISPCLK = 0; for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, - CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7712,7 +7995,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; @@ -8256,23 +8539,23 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.PSCL_FACTOR, mode_lib->ms.PSCL_FACTOR_CHROMA, mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->soc.return_bus_width_bytes, /* Output */ &mode_lib->ms.dcfclk_deepsleep); for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; } else { mode_lib->ms.WritebackDelayTime[k] = 0.0; @@ -8349,7 +8632,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8367,6 +8650,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { @@ -8408,7 +8698,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); - if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; @@ -8515,8 +8805,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->hostvm_enable, mode_lib->ms.MaxDCFCLK, mode_lib->ms.MaxFabricClock, +#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW + mode_lib->ms.dram_bw_mbps); +#else mode_lib->ms.max_dram_bw_mbps); - +#endif // Average BW support check calculate_avg_bandwidth_required( @@ -8524,8 +8817,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out // input display_cfg, mode_lib->ms.num_active_planes, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->ms.cursor_bw, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, @@ -8595,6 +8888,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); @@ -8638,9 +8932,32 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.ExtraLatency_sr, &mode_lib->ms.ExtraLatencyPrefetch); - { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { mode_lib->ms.support.PrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; mode_lib->ms.TWait[k] = CalculateTWait( @@ -8730,6 +9047,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; // output CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; @@ -8758,6 +9078,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); @@ -8766,6 +9090,27 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); } // for k num_planes + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->ms.dst_y_prefetch[k] < 2.0 || mode_lib->ms.LinesForVM[k] >= 32.0 @@ -8789,7 +9134,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } mode_lib->ms.support.VRatioInPrefetchSupported = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; @@ -8799,10 +9144,14 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { // Calculate Urgent burst factor for prefetch #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); @@ -8815,7 +9164,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.swath_width_chroma_ub[k], mode_lib->ms.SwathHeightY[k], mode_lib->ms.SwathHeightC[k], - line_time_us, + s->line_times[k], mode_lib->ms.UrgLatency, mode_lib->ms.VRatioPreY[k], mode_lib->ms.VRatioPreC[k], @@ -8852,8 +9201,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; @@ -8899,127 +9248,164 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); - // Both prefetch schedule and BW okay - if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_qual, // no flip - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( - s->HostVMInefficiencyFactor, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); - } + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 1, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.Tno_bw_flip[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->ms.meta_row_bytes[k], - s->meta_row_height_luma[k], - s->meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - /* Output */ - &mode_lib->ms.dst_y_per_vm_flip[k], - &mode_lib->ms.dst_y_per_row_flip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } + } - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth_flip - &mode_lib->ms.support.ImmediateFlipSupport, - - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_flip, - mode_lib->ms.support.non_urg_bandwidth_required_flip, - mode_lib->ms.support.urg_bandwidth_available); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) - mode_lib->ms.support.ImmediateFlipSupport = false; - } + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); - } else { // if prefetch not support, assume iflip is not supported too + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) mode_lib->ms.support.ImmediateFlipSupport = false; - } - } // prefetch schedule + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; } s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; @@ -9116,8 +9502,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->pstate_bytes_required_c, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->ms.surface_avg_vactive_required_bw, mode_lib->ms.surface_peak_required_bw, /* outputs */ @@ -9187,12 +9573,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; } - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; @@ -9229,7 +9615,7 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); @@ -9882,7 +10268,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { - if (p->display_cfg->stream_descriptors[k].writeback.enable) + if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) l->TotalActiveWriteback = l->TotalActiveWriteback + 1; if (TotalNumberOfActiveOTG == 0) { // first otg @@ -9984,6 +10370,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; @@ -10075,12 +10462,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_assert(s->SOCCLK > 0); #ifdef __DML_VBA_DEBUG__ - // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes); - // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes); - // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes); - // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes); - // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes); - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); @@ -10198,10 +10579,10 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); - mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); } CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; @@ -10217,8 +10598,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; @@ -10539,8 +10920,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } @@ -10583,17 +10964,17 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->mp.WritebackDelay[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; } else mode_lib->mp.WritebackDelay[k] = 0; @@ -10679,10 +11060,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us = 0.0; - - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { calculate_cursor_req_attributes( display_cfg->plane_descriptors[k].cursor.cursor_width, @@ -10699,7 +11095,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex display_cfg->plane_descriptors[k].cursor.cursor_width, s->cursor_bytes_per_chunk[k], s->cursor_lines_per_chunk[k], - line_time_us, + s->line_times[k], mode_lib->mp.UrgentLatency, // output @@ -10714,7 +11110,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.swath_width_chroma_ub[k], mode_lib->mp.SwathHeightY[k], mode_lib->mp.SwathHeightC[k], - line_time_us, + s->line_times[k], mode_lib->mp.UrgentLatency, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, @@ -10752,6 +11148,35 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); #endif + if (s->num_active_planes > 1) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming + } + { s->DestinationLineTimesForPrefetchLessThan2 = false; s->VRatioPrefetchMoreThanMax = false; @@ -10763,11 +11188,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); mode_lib->mp.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10848,6 +11273,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; // output CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; @@ -10876,9 +11304,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + if (s->impacted_dst_y_pre[k] > 0) + mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; + else + mode_lib->mp.impacted_prefetch_margin_us[k] = 0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); #endif @@ -10956,8 +11393,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); @@ -10988,8 +11425,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; @@ -11120,8 +11557,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; @@ -11238,8 +11675,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); - s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); - s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; @@ -11289,7 +11726,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / @@ -11475,25 +11912,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex //Maximum Bandwidth Used s->TotalWRBandwidth = 0; - s->WRBandwidth = 0; - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) { - s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8; + for (k = 0; k < display_cfg->num_streams; ++k) { + s->WRBandwidth = 0; + if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { + s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / + (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height + / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) + * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; } - s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; } mode_lib->mp.TotalDataReadBandwidth = 0; for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k]; + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); #endif } @@ -11530,8 +11967,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; @@ -11742,7 +12179,7 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); @@ -12321,14 +12758,18 @@ void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_interna void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, - struct dmub_fams2_stream_static_state *fams2_programming, - enum dml2_uclk_pstate_support_method pstate_method, + union dmub_cmd_fams2_config *fams2_base_programming, + union dmub_cmd_fams2_config *fams2_sub_programming, + enum dml2_pstate_method pstate_method, int plane_index) { const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index]; + struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; + union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; + unsigned int i; if (display_cfg->display_config.overrides.all_streams_blanked) { @@ -12337,110 +12778,110 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna } /* from display configuration */ - fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; - fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; - fams2_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; + base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; + base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch); - fams2_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - stream_descriptor->timing.v_active); - fams2_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; + base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; /* from meta */ - fams2_programming->otg_vline_time_ns = + base_programming->otg_vline_time_ns = (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0); - fams2_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; - fams2_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; - fams2_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; - fams2_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; + base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; + base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; + base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - stream_fams2_meta->method_drr.programming_delay_otg_vlines); - fams2_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; - fams2_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; + base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; + base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; /* from core */ - fams2_programming->config.bits.min_ttu_vblank_usable = true; + base_programming->config.bits.min_ttu_vblank_usable = true; for (i = 0; i < display_cfg->display_config.num_planes; i++) { /* check if all planes support p-state in blank */ if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { - fams2_programming->config.bits.min_ttu_vblank_usable = false; + base_programming->config.bits.min_ttu_vblank_usable = false; break; } } switch (pstate_method) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: /* legacy vactive */ - fams2_programming->type = FAMS2_STREAM_TYPE_VACTIVE; - fams2_programming->sub_state.legacy.vactive_det_fill_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; + sub_programming->legacy.vactive_det_fill_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: /* legacy vblank */ - fams2_programming->type = FAMS2_STREAM_TYPE_VBLANK; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_VBLANK; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_fw_drr: + case dml2_pstate_method_fw_drr: /* drr */ - fams2_programming->type = FAMS2_STREAM_TYPE_DRR; - fams2_programming->sub_state.drr.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; - fams2_programming->sub_state.drr.nom_stretched_vtotal = - (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; + base_programming->type = FAMS2_STREAM_TYPE_DRR; + sub_programming->drr.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; + sub_programming->drr.nom_stretched_vtotal = + (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; /* drr only clamps to vtotal min for single display */ - fams2_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; - fams2_programming->sub_state.drr.only_stretch_if_required = true; + base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; + sub_programming->drr.only_stretch_if_required = true; break; - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: /* subvp */ - fams2_programming->type = FAMS2_STREAM_TYPE_SUBVP; - fams2_programming->sub_state.subvp.vratio_numerator = - (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); - fams2_programming->sub_state.subvp.vratio_denominator = 1000; - fams2_programming->sub_state.subvp.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; - fams2_programming->sub_state.subvp.prefetch_to_mall_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; - fams2_programming->sub_state.subvp.phantom_vtotal = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; - fams2_programming->sub_state.subvp.phantom_vactive = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; - fams2_programming->sub_state.subvp.config.bits.is_multi_planar = - plane_descriptor->surface.plane1.height > 0; - fams2_programming->sub_state.subvp.config.bits.is_yuv420 = - plane_descriptor->pixel_format == dml2_420_8 || - plane_descriptor->pixel_format == dml2_420_10 || - plane_descriptor->pixel_format == dml2_420_12; - - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_SUBVP; + sub_programming->subvp.vratio_numerator = + (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); + sub_programming->subvp.vratio_denominator = 1000; + sub_programming->subvp.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; + sub_programming->subvp.prefetch_to_mall_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + sub_programming->subvp.phantom_vtotal = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; + sub_programming->subvp.phantom_vactive = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; + sub_programming->subvp.config.bits.is_multi_planar = + plane_descriptor->surface.plane1.height > 0; + sub_programming->subvp.config.bits.is_yuv420 = + plane_descriptor->pixel_format == dml2_420_8 || + plane_descriptor->pixel_format == dml2_420_10 || + plane_descriptor->pixel_format == dml2_420_12; + + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_reserved_hw: - case dml2_uclk_pstate_support_method_reserved_fw: - case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: - case dml2_uclk_pstate_support_method_reserved_fw_drr_var: - case dml2_uclk_pstate_support_method_not_supported: - case dml2_uclk_pstate_support_method_count: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_na: + case dml2_pstate_method_count: default: /* this should never happen */ break; @@ -12569,6 +13010,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support; + out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; @@ -12662,7 +13105,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); - out->informative.watermarks.g6_temp_read_watermark_us = dml_get_wm_g6_temp_read(mode_lib); + out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); out->informative.mall.total_surface_size_in_mall_bytes = 0; for (k = 0; k < out->display_config.num_planes; ++k) @@ -12745,6 +13188,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); + out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; + for (k = 0; k < out->display_config.num_planes; k++) { if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) @@ -12824,6 +13269,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0; out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; @@ -12831,6 +13277,9 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + + if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) + out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; } // For this DV informative layer, all pipes in the same planes will just use the same id @@ -12853,16 +13302,11 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; } } - - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; } else { out->informative.misc.ROBUrgencyAvoidance = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index df2d1550a14b..27ef0e096b25 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -28,7 +28,7 @@ void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *displ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); -void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index); void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index cbdfbd5a0bde..23c0fca5515f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -201,7 +201,7 @@ struct dml2_core_internal_watermarks { double Z8StutterExitWatermark; double Z8StutterEnterPlusExitWatermark; double USRRetrainingWatermark; - double g6_temp_read_watermark_us; + double temp_read_or_ppt_watermark_us; }; struct dml2_core_internal_mode_support_info { @@ -252,8 +252,8 @@ struct dml2_core_internal_mode_support_info { bool PTEBufferSizeNotExceeded; bool DCCMetaBufferSizeNotExceeded; - enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; bool global_dram_clock_change_supported; bool global_fclk_change_supported; bool USRRetrainingSupport; @@ -318,12 +318,15 @@ struct dml2_core_internal_mode_support_info { bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; double max_urgent_latency_us; + double max_non_urgent_latency_us; double avg_non_urgent_latency_us; double avg_urgent_latency_us; + double df_response_time_us; bool incorrect_imall_usage; bool g6_temp_read_support; + bool temp_read_or_ppt_support; struct dml2_core_internal_watermarks watermarks; }; @@ -378,8 +381,8 @@ struct dml2_core_internal_mode_support { unsigned int DETBufferSizeC[DML2_MAX_PLANES]; unsigned int SwathHeightY[DML2_MAX_PLANES]; unsigned int SwathHeightC[DML2_MAX_PLANES]; - unsigned int SwathWidthY[DML2_MAX_PLANES]; - unsigned int SwathWidthC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe // ---------------------------------- // Intermediates/Informational @@ -476,9 +479,9 @@ struct dml2_core_internal_mode_support { // Bandwidth Related Info double BandwidthAvailableForImmediateFlip; - double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; // no dcc overhead, for the plane - double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; - double WriteBandwidth[DML2_MAX_PLANES]; + double vactive_sw_bw_l[DML2_MAX_PLANES]; // no dcc overhead, for the plane + double vactive_sw_bw_c[DML2_MAX_PLANES]; + double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; @@ -539,7 +542,7 @@ struct dml2_core_internal_mode_program { unsigned int qos_param_index; // to access the uclk dependent dpm table unsigned int active_min_uclk_dpm_index; // to access the min_clk table double FabricClock; /// <brief Basically just the clock freq at the min (or given) state - double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting double dram_bw_mbps; double uclk_freq_mhz; unsigned int NoOfDPP[DML2_MAX_PLANES]; @@ -562,14 +565,14 @@ struct dml2_core_internal_mode_program { double BytePerPixelInDETC[DML2_MAX_PLANES]; unsigned int BytePerPixelY[DML2_MAX_PLANES]; unsigned int BytePerPixelC[DML2_MAX_PLANES]; - unsigned int SwathWidthY[DML2_MAX_PLANES]; - unsigned int SwathWidthC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES]; unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES]; - double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; - double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; + double vactive_sw_bw_l[DML2_MAX_PLANES]; + double vactive_sw_bw_c[DML2_MAX_PLANES]; double excess_vactive_fill_bw_l[DML2_MAX_PLANES]; double excess_vactive_fill_bw_c[DML2_MAX_PLANES]; @@ -797,8 +800,9 @@ struct dml2_core_internal_mode_program { double MaxActiveFCLKChangeLatencySupported; bool USRRetrainingSupport; bool g6_temp_read_support; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; - enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + bool temp_read_or_ppt_support; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; bool global_dram_clock_change_supported; bool global_fclk_change_supported; double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; @@ -846,6 +850,8 @@ struct dml2_core_internal_mode_program { bool mall_comb_mcache_l[DML2_MAX_PLANES]; bool mall_comb_mcache_c[DML2_MAX_PLANES]; bool lc_comb_mcache[DML2_MAX_PLANES]; + + double impacted_prefetch_margin_us[DML2_MAX_PLANES]; }; struct dml2_core_internal_SOCParametersList { @@ -862,6 +868,7 @@ struct dml2_core_internal_SOCParametersList { double USRRetrainingLatency; double SMNLatency; double g6_temp_read_blackout_us; + double temp_read_or_ppt_blackout_us; double max_urgent_latency_us; double df_response_time_us; enum dml2_qos_param_type qos_type; @@ -951,6 +958,7 @@ struct dml2_core_calcs_mode_support_locals { unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; double tdlut_opt_time[DML2_MAX_PLANES]; double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; @@ -961,6 +969,18 @@ struct dml2_core_calcs_mode_support_locals { unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + bool recalc_prefetch_done; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + double prefetch_swath_time_us[DML2_MAX_PLANES]; }; struct dml2_core_calcs_mode_programming_locals { @@ -1024,6 +1044,7 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; double tdlut_opt_time[DML2_MAX_PLANES]; double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; @@ -1041,6 +1062,16 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; }; struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { @@ -1048,6 +1079,7 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_local double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES]; double USRRetrainingLatencyMargin[DML2_MAX_PLANES]; double g6_temp_read_latency_margin[DML2_MAX_PLANES]; + double temp_read_or_ppt_latency_margin[DML2_MAX_PLANES]; double EffectiveLBLatencyHidingY; double EffectiveLBLatencyHidingC; @@ -1185,17 +1217,14 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double LineTime; double dst_y_prefetch_equ; double prefetch_bw_oto; + double per_pipe_vactive_sw_bw; double Tvm_oto; double Tr0_oto; - double Tvm_no_trip_oto; - double Tr0_no_trip_oto; double Tvm_oto_lines; double Tr0_oto_lines; double dst_y_prefetch_oto; double TimeForFetchingVM; double TimeForFetchingRowInVBlank; - double dst_y_per_vm_no_trip_vblank; - double dst_y_per_row_no_trip_vblank; double LinesToRequestPrefetchPixelData; unsigned int HostVMDynamicLevelsTrips; double trip_to_mem; @@ -1203,15 +1232,12 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double Tr0_trips_rounded; double max_Tsw; double Lsw_oto; - double Lsw_equ; - double Tpre_rounded; double prefetch_bw_equ; double Tvm_equ; double Tr0_equ; double Tdmbf; double Tdmec; double Tdmsks; - double prefetch_sw_bytes; double total_row_bytes; double prefetch_bw_pr; double bytes_pp; @@ -1225,6 +1251,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double prefetch_bw2; double prefetch_bw3; double prefetch_bw4; + double dst_y_prefetch_equ_impacted; double TWait_p; unsigned int cursor_prefetch_bytes; @@ -1545,17 +1572,18 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_param // Output struct dml2_core_internal_watermarks *Watermark; - enum dml2_dram_clock_change_support *DRAMClockChangeSupport; + enum dml2_pstate_change_support *DRAMClockChangeSupport; bool *global_dram_clock_change_supported; double *MaxActiveDRAMClockChangeLatencySupported; unsigned int *SubViewportLinesNeededInMALL; - enum dml2_fclock_change_support *FCLKChangeSupport; + enum dml2_pstate_change_support *FCLKChangeSupport; bool *global_fclk_change_supported; double *MaxActiveFCLKChangeLatencySupported; bool *USRRetrainingSupport; double *VActiveLatencyHidingMargin; double *VActiveLatencyHidingUs; bool *g6_temp_read_support; + bool *temp_read_or_ppt_support; }; @@ -1727,8 +1755,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double PrefetchSourceLinesC; unsigned int VInitPreFillC; unsigned int MaxNumSwathC; - unsigned int swath_width_luma_ub; - unsigned int swath_width_chroma_ub; + unsigned int swath_width_luma_ub; // per-pipe + unsigned int swath_width_chroma_ub; // per-pipe unsigned int SwathHeightY; unsigned int SwathHeightC; double TWait; @@ -1750,6 +1778,10 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { unsigned int meta_row_bytes; double mall_prefetch_sdp_overhead_factor; + double impacted_dst_y_pre; + double vactive_sw_bw_l; // per surface bw + double vactive_sw_bw_c; // per surface bw + // output unsigned int *DSTXAfterScaler; unsigned int *DSTYAfterScaler; @@ -1767,6 +1799,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *Tdmdl_vm; double *Tdmdl; double *TSetup; + double *Tpre_rounded; + double *Tpre_oto; double *Tvm_trips; double *Tr0_trips; double *Tvm_trips_flip; @@ -1777,6 +1811,48 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { unsigned int *VUpdateWidthPix; unsigned int *VReadyOffsetPix; double *prefetch_cursor_bw; + double *prefetch_sw_bytes; + double *prefetch_swath_time_us; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params { + unsigned int num_active_planes; + enum dml2_source_format_class *pixel_format; + unsigned int rob_buffer_size_kbytes; + unsigned int compressed_buffer_size_kbytes; + unsigned int chunk_bytes_l; // same for all planes + unsigned int chunk_bytes_c; + unsigned int *detile_buffer_size_bytes_l; + unsigned int *detile_buffer_size_bytes_c; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *lb_source_lines_l; + unsigned int *lb_source_lines_c; + unsigned int *swath_height_l; + unsigned int *swath_height_c; + double *prefetch_sw_bytes; + double *Tpre_rounded; + double *Tpre_oto; + double estimated_dcfclk_mhz; + double estimated_urg_bandwidth_required_mbps; + double *line_time; + double *dst_y_prefetch; + + // output + bool *recalc_prefetch_schedule; + double *impacted_dst_y_pre; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals { + unsigned int max_Trpd_dcfclk_cycles; + unsigned int burst_bytes_to_fill_det; + double time_to_fill_det_us; + unsigned int accumulated_return_path_dcfclk_cycles[DML2_MAX_PLANES]; + bool prefetch_global_check_passed; + unsigned int src_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int src_swath_bytes_c[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_l[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_c[DML2_MAX_PLANES]; }; struct dml2_core_calcs_calculate_mcache_row_bytes_params { @@ -1921,6 +1997,7 @@ struct dml2_core_calcs_calculate_tdlut_setting_params { unsigned int *tdlut_groups_per_2row_ub; double *tdlut_opt_time; double *tdlut_drain_time; + unsigned int *tdlut_bytes_to_deliver; unsigned int *tdlut_bytes_per_group; }; @@ -2004,6 +2081,7 @@ struct dml2_core_internal_scratch { struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals; struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals CheckGlobalPrefetchAdmissibility_locals; struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals; struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals; struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals; @@ -2019,6 +2097,7 @@ struct dml2_core_internal_scratch { struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params CheckGlobalPrefetchAdmissibility_params; struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params; struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params; struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params; @@ -2038,7 +2117,6 @@ struct dml2_core_internal_display_mode_lib { // Used to hold input; intermediate and output of the calculations struct dml2_core_internal_mode_support ms; // struct for mode support struct dml2_core_internal_mode_program mp; // struct for mode programming - // Available overridable calculators for core_shared. // if null, core_shared will use default calculators. struct dml2_core_shared_calculation_funcs funcs; @@ -2051,7 +2129,6 @@ struct dml2_core_calcs_mode_support_ex { const struct dml2_display_cfg *in_display_cfg; const struct dml2_mcg_min_clock_table *min_clk_table; int min_clk_index; - //unsigned int in_state_index; struct dml2_core_internal_mode_support_info *out_evaluation_info; }; @@ -2064,9 +2141,7 @@ struct dml2_core_calcs_mode_programming_ex { const struct dml2_mcg_min_clock_table *min_clk_table; const struct core_display_cfg_support_info *cfg_support_info; int min_clk_index; - struct dml2_display_cfg_programming *programming; - }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 714b5c39b7e6..456b3f8a6d38 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -63,6 +63,150 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) case dml2_mono_16: val = 0; break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 1; + break; + case dml2_422_planar_10: + val = 1; + break; + case dml2_422_planar_12: + val = 1; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 1; + break; + case dml2_422_packed_10: + val = 1; + break; + case dml2_422_packed_12: + val = 1; + break; default: DML2_ASSERT(0); break; @@ -154,9 +298,9 @@ void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); @@ -280,39 +424,49 @@ bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_c return is_phantom; } -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) -{ - switch (sw_mode) { - case (dml2_sw_linear): - return 256; break; - case (dml2_sw_256b_2d): - return 256; break; - case (dml2_sw_4kb_2d): - return 4096; break; - case (dml2_sw_64kb_2d): - return 65536; break; - case (dml2_sw_256kb_2d): - return 262144; break; - case (dml2_gfx11_sw_linear): - return 256; break; - case (dml2_gfx11_sw_64kb_d): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_t): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_x): - return 65536; break; - case (dml2_gfx11_sw_64kb_r_x): - return 65536; break; - case (dml2_gfx11_sw_256kb_d_x): - return 262144; break; - case (dml2_gfx11_sw_256kb_r_x): - return 262144; break; - default: +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { DML2_ASSERT(0); return 256; }; } +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + return (byte_per_pixel != 2); +} + +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) +{ + return (sw_mode == dml2_sw_linear || sw_mode == dml2_sw_linear_256b || sw_mode == dml2_linear_64elements); +}; + bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) { @@ -325,7 +479,6 @@ bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) return is_vert; } - int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) { int unsigned version = 0; @@ -334,17 +487,17 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_sw_256b_2d || sw_mode == dml2_sw_4kb_2d || sw_mode == dml2_sw_64kb_2d || - sw_mode == dml2_sw_256kb_2d) { + sw_mode == dml2_sw_256kb_2d) version = 12; - } else if (sw_mode == dml2_gfx11_sw_linear || + else if (sw_mode == dml2_gfx11_sw_linear || sw_mode == dml2_gfx11_sw_64kb_d || sw_mode == dml2_gfx11_sw_64kb_d_t || sw_mode == dml2_gfx11_sw_64kb_d_x || sw_mode == dml2_gfx11_sw_64kb_r_x || sw_mode == dml2_gfx11_sw_256kb_d_x || - sw_mode == dml2_gfx11_sw_256kb_r_x) { + sw_mode == dml2_gfx11_sw_256kb_r_x) version = 11; - } else { + else { dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); DML2_ASSERT(0); } @@ -403,7 +556,7 @@ bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) { bool ret_val = 0; - if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) ret_val = 1; return ret_val; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h index a5cc6a07167a..95f0d017add4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -11,6 +11,8 @@ double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format); void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); @@ -18,8 +20,10 @@ unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int mu unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 8869ea089312..fc77fb34a19a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -96,6 +96,7 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm double min_uclk_latency; const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + /* assumes DF throttling is enabled */ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); @@ -125,6 +126,37 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + + /* assumes DF throttling is disabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -272,6 +304,17 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr if (result) result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); + /* these clocks are optional, so they can fail to map, in which case map all to 0 */ + if (result) { + if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) { + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0; + } + } + return result; } @@ -374,11 +417,11 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) { - unsigned char i; + unsigned int i; bool identical = true; bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; // Create a remap array to enable simple iteration through only masked stream indicies for (i = 0; i < display_config->num_streams; i++) { @@ -413,10 +456,10 @@ static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *displa static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) { - unsigned char i; + unsigned int i; int min_idle_us = 0; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; // Create a remap array to enable simple iteration through only masked stream indicies @@ -711,7 +754,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); @@ -725,7 +768,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index a31db5742675..e763c8e45da8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -195,11 +195,11 @@ static int count_planes_with_stream_index(const struct dml2_display_cfg *display static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) { - unsigned char i; + unsigned int i; bool identical = true; bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; // Create a remap array to enable simple iteration through only masked stream indicies for (i = 0; i < display_config->display_config.num_streams; i++) { @@ -347,8 +347,12 @@ static int find_highest_odm_load_stream_index( int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + if (odm_load > highest_odm_load) { highest_odm_load_index = i; highest_odm_load = odm_load; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 92269f0e50ed..a3324f7b9ba6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -13,32 +13,32 @@ static const double MIN_BLANK_STUTTER_FACTOR = 3.0; static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then SVP { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Finally VBlank, but allow base clocks for latency to increase /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -49,56 +49,56 @@ static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1 static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then VActive + VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then VBlank only { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then SVP + VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then SVP + DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then SVP + SVP { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then DRR + VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then DRR + DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Finally VBlank, but allow base clocks for latency to increase /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -109,32 +109,32 @@ static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2 static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { // All VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na }, .allow_state_increase = true, }, // VActive + 1 VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = false, }, // All VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = false, }, // All DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na }, .allow_state_increase = true, }, // All VBlank, with state increase allowed /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -145,32 +145,32 @@ static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3 static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { // All VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive }, .allow_state_increase = true, }, // VActive + 1 VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank }, .allow_state_increase = false, }, // All Vblank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, .allow_state_increase = false, }, // All DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr }, .allow_state_increase = true, }, // All VBlank, with state increase allowed /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, .allow_state_increase = true, }, */ @@ -355,29 +355,30 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) +static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy) { - enum dml2_pmo_pstate_method variant_strategy = 0; + enum dml2_pstate_method variant_strategy = 0; switch (base_strategy) { - case dml2_pmo_pstate_strategy_vactive: - variant_strategy = dml2_pmo_pstate_strategy_fw_vactive_drr; + case dml2_pstate_method_vactive: + variant_strategy = dml2_pstate_method_fw_vactive_drr; break; - case dml2_pmo_pstate_strategy_vblank: - variant_strategy = dml2_pmo_pstate_strategy_fw_vblank_drr; + case dml2_pstate_method_vblank: + variant_strategy = dml2_pstate_method_fw_vblank_drr; break; - case dml2_pmo_pstate_strategy_fw_svp: - variant_strategy = dml2_pmo_pstate_strategy_fw_svp_drr; + case dml2_pstate_method_fw_svp: + variant_strategy = dml2_pstate_method_fw_svp_drr; break; - case dml2_pmo_pstate_strategy_fw_vactive_drr: - case dml2_pmo_pstate_strategy_fw_vblank_drr: - case dml2_pmo_pstate_strategy_fw_svp_drr: - case dml2_pmo_pstate_strategy_fw_drr: - case dml2_pmo_pstate_strategy_reserved_hw: - case dml2_pmo_pstate_strategy_reserved_fw: - case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped: - case dml2_pmo_pstate_strategy_reserved_fw_drr_var: - case dml2_pmo_pstate_strategy_na: + case dml2_pstate_method_fw_vactive_drr: + case dml2_pstate_method_fw_vblank_drr: + case dml2_pstate_method_fw_svp_drr: + case dml2_pstate_method_fw_drr: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: default: /* no variant for this mode */ variant_strategy = base_strategy; @@ -419,23 +420,22 @@ static unsigned int get_num_expanded_strategies( static void insert_strategy_into_expanded_list( const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, - int stream_count, - struct dml2_pmo_init_data *init_data) + const int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { - struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; - - expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); + if (expanded_strategy_list && num_expanded_strategies) { + memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - - init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; + (*num_expanded_strategies)++; } } -static void expand_base_strategy(struct dml2_pmo_instance *pmo, +static void expand_base_strategy( const struct dml2_pmo_pstate_strategy *base_strategy, - unsigned int stream_count) + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { bool skip_to_next_stream; bool expanded_strategy_added; @@ -473,7 +473,7 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -512,9 +512,9 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, const struct dml2_pmo_pstate_strategy *variant_strategy, - unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], - unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], - unsigned int stream_count) + const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int stream_count) { bool valid = true; unsigned int i; @@ -522,7 +522,7 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_ /* check all restrictions are met */ for (i = 0; i < stream_count; i++) { /* vblank + vblank_drr variants are invalid */ - if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank && ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || num_streams_per_variant_method[i] > 1)) { valid = false; @@ -533,9 +533,12 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_ return valid; } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, +static void expand_variant_strategy( const struct dml2_pmo_pstate_strategy *base_strategy, - unsigned int stream_count) + const unsigned int stream_count, + const bool should_permute, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { bool variant_found; unsigned int i, j; @@ -544,7 +547,7 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; /* determine number of displays per method */ @@ -585,7 +588,13 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, } if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { - expand_base_strategy(pmo, &variant_strategy, stream_count); + if (should_permute) { + /* permutations are permitted, proceed to expand */ + expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } else { + /* no permutations allowed, so add to list now */ + insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } } /* rollback to earliest method with bases remaining */ @@ -612,18 +621,19 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_base_strategies( - struct dml2_pmo_instance *pmo, - const struct dml2_pmo_pstate_strategy *base_strategies_list, - const unsigned int num_base_strategies, - unsigned int stream_count) +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { unsigned int i; /* expand every explicit base strategy (except all DRR) */ for (i = 0; i < num_base_strategies; i++) { - expand_base_strategy(pmo, &base_strategies_list[i], stream_count); - expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); + expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies); + expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies); } } @@ -652,25 +662,45 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_1_display, base_strategy_list_1_display_size, 1); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_1_display, + base_strategy_list_1_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 2: DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_2_display, base_strategy_list_2_display_size, 2); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_2_display, + base_strategy_list_2_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 3: DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_3_display, base_strategy_list_3_display_size, 3); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_3_display, + base_strategy_list_3_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 4: DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_4_display, + base_strategy_list_4_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; } } @@ -783,8 +813,12 @@ static int find_highest_odm_load_stream_index( int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + if (odm_load > highest_odm_load) { highest_odm_load_index = i; highest_odm_load = odm_load; @@ -941,11 +975,8 @@ static void build_synchronized_timing_groups( /* find synchronizable timing groups */ for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0 && - display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && - (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match - display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } @@ -959,7 +990,7 @@ static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_config, unsigned int mask) { - unsigned char i; + unsigned int i; bool valid = true; // Create a remap array to enable simple iteration through only masked stream indicies @@ -1008,7 +1039,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_config, unsigned int mask) { - unsigned char i; + unsigned int i; for (i = 0; i < DML2_MAX_PLANES; i++) { const struct dml2_stream_parameters *stream_descriptor; const struct dml2_fams2_meta *stream_fams2_meta; @@ -1050,7 +1081,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, const struct dml2_plane_parameters *plane_descriptor; const struct dml2_fams2_meta *stream_fams2_meta; unsigned int microschedule_vlines; - unsigned char i; + unsigned int i; unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; @@ -1106,24 +1137,73 @@ static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *ps scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) +static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy) { - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; + enum dml2_pstate_method method = dml2_pstate_method_na; - if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (method == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (method == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; + switch (override_strategy) { + case dml2_uclk_pstate_change_strategy_force_vactive: + method = dml2_pstate_method_vactive; + break; + case dml2_uclk_pstate_change_strategy_force_vblank: + method = dml2_pstate_method_vblank; + break; + case dml2_uclk_pstate_change_strategy_force_drr: + method = dml2_pstate_method_fw_drr; + break; + case dml2_uclk_pstate_change_strategy_force_mall_svp: + method = dml2_pstate_method_fw_svp; + break; + case dml2_uclk_pstate_change_strategy_force_mall_full_frame: + case dml2_uclk_pstate_change_strategy_auto: + default: + method = dml2_pstate_method_na; + } + + return method; +} + +static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method) +{ + enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto; + + switch (method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_pstate_method_fw_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + override_strategy = dml2_uclk_pstate_change_strategy_auto; + } + + return override_strategy; +} + +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) +{ + unsigned int i; for (i = 0; i < DML2_MAX_PLANES; i++) { if (is_bit_set_in_bitfield(plane_mask, i)) { if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method)) return false; } } @@ -1149,32 +1229,33 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_method stream_pstate_method, + enum dml2_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; switch (stream_pstate_method) { - case dml2_pmo_pstate_strategy_vactive: - case dml2_pmo_pstate_strategy_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; break; - case dml2_pmo_pstate_strategy_vblank: - case dml2_pmo_pstate_strategy_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common; break; - case dml2_pmo_pstate_strategy_fw_svp: - case dml2_pmo_pstate_strategy_fw_svp_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common; break; - case dml2_pmo_pstate_strategy_fw_drr: + case dml2_pstate_method_fw_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common; break; - case dml2_pmo_pstate_strategy_reserved_hw: - case dml2_pmo_pstate_strategy_reserved_fw: - case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped: - case dml2_pmo_pstate_strategy_reserved_fw_drr_var: - case dml2_pmo_pstate_strategy_na: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: default: stream_method_fams2_meta = NULL; } @@ -1215,7 +1296,7 @@ static bool is_timing_group_schedulable( if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (!stream_method_fams2_meta) - return false; + continue; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1295,7 +1376,7 @@ static bool is_config_schedulable( if (j_disallow_us < jp1_disallow_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], - s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); swapped = true; } } @@ -1354,7 +1435,7 @@ static bool is_config_schedulable( if (j_period_us < jp1_period_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], - s->pmo_dcn4.sorted_group_gtl_period_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); swapped = true; } } @@ -1413,7 +1494,7 @@ static bool is_config_schedulable( static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_method stream_pstate_method, + const enum dml2_pstate_method stream_pstate_method, unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; @@ -1468,7 +1549,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins { struct dml2_pmo_scratch *s = &pmo->scratch; - unsigned char stream_index = 0; + unsigned int stream_index = 0; unsigned int svp_count = 0; unsigned int svp_stream_mask = 0; @@ -1494,19 +1575,19 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins strategy_matches_drr_requirements &= stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1532,7 +1613,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) { - unsigned char i; + unsigned int i; int min_vactive_margin_us = 0xFFFFFFF; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1625,7 +1706,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, /* for single stream, guarantee at least an instant of allow */ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( math_max2(0.0, - timing->v_active - stream_fams2_meta->min_allow_width_otg_vlines - stream_fams2_meta->dram_clk_change_blackout_otg_vlines)); + timing->v_active - math_max2(1.0, stream_fams2_meta->min_allow_width_otg_vlines) - stream_fams2_meta->dram_clk_change_blackout_otg_vlines)); } else { /* for multi stream, bound to a max fill time defined by IP caps */ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = @@ -1738,8 +1819,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; const struct dml2_pmo_pstate_strategy *strategy_list = NULL; + struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; unsigned int strategy_list_size = 0; - unsigned char plane_index, stream_index, i; + unsigned int plane_index, stream_index, i; + bool build_override_strategy = true; state->performed = true; in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1763,7 +1846,11 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto; + override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] = + uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy); } // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta @@ -1781,13 +1868,30 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp /* get synchronized timing groups */ build_synchronized_timing_groups(pmo, display_config); - strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); - if (!strategy_list) - return false; - - strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + if (build_override_strategy) { + /* build expanded override strategy list (no permutations) */ + override_base_strategy.allow_state_increase = true; + s->pmo_dcn4.num_expanded_override_strategies = 0; + insert_strategy_into_expanded_list(&override_base_strategy, + display_config->display_config.num_streams, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + expand_variant_strategy(&override_base_strategy, + display_config->display_config.num_streams, + false, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + + /* use override strategy list */ + strategy_list = s->pmo_dcn4.expanded_override_strategy_list; + strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies; + } else { + /* use predefined strategy list */ + strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + } - if (strategy_list_size == 0) + if (!strategy_list || strategy_list_size == 0) return false; s->pmo_dcn4.num_pstate_candidates = 0; @@ -1799,7 +1903,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp } if (s->pmo_dcn4.num_pstate_candidates > 0) { - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates - 1].allow_state_increase = true; + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true; s->pmo_dcn4.cur_pstate_candidate = -1; return true; } else { @@ -1832,7 +1936,7 @@ static void reset_display_configuration(struct display_configuation_with_meta *d // Reset strategy to auto plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na; } } @@ -1840,7 +1944,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta * struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -1849,7 +1953,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta * plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr; } } @@ -1861,13 +1965,13 @@ static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta * { struct dml2_pmo_scratch *scratch = &pmo->scratch; - unsigned char plane_index; + unsigned int plane_index; int stream_index = -1; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp; } } @@ -1884,13 +1988,13 @@ static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_me { struct dml2_pmo_scratch *scratch = &pmo->scratch; - unsigned char plane_index; + unsigned int plane_index; int stream_index = -1; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr; } } @@ -1905,7 +2009,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -1915,7 +2019,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, plane->overrides.reserved_vblank_time_ns); - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank; } } @@ -1925,7 +2029,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -1933,7 +2037,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with plane = &display_config->display_config.plane_descriptors[plane_index]; plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vblank_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr; } } } @@ -1942,14 +2046,14 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; unsigned int stream_index; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = @@ -1963,14 +2067,14 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; unsigned int stream_index; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vactive_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = @@ -1992,26 +2096,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { success = false; break; - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -2031,7 +2135,7 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) { int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; + unsigned int plane_index = 0; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { @@ -2066,34 +2170,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { p_state_supported = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 0c25bd3e9ac0..6baab7ad6ecc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -23,4 +23,11 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index add51d41a515..7ed0242a4b33 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -72,7 +72,6 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; - result = true; break; case dml2_project_invalid: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c new file mode 100644 index 000000000000..f88931ccbc5e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml_top.h" +#include "dml2_internal_shared_types.h" +#include "dml2_top_soc15.h" + +unsigned int dml2_get_instance_size_bytes(void) +{ + return sizeof(struct dml2_instance); +} + +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + switch (in_out->options.project_id) { + case dml2_project_dcn4x_stage1: + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + return dml2_top_soc15_initialize_instance(in_out); + case dml2_project_invalid: + default: + return false; + } +} + +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.check_mode_supported) + return false; + + return in_out->dml2_instance->funcs.check_mode_supported(in_out); +} + +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mode_programming) + return false; + + return in_out->dml2_instance->funcs.build_mode_programming(in_out); +} + +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mcache_programming) + return false; + + return in_out->dml2_instance->funcs.build_mcache_programming(in_out); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c new file mode 100644 index 000000000000..5e14d85821e2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_legacy.h" +#include "dml2_top_soc15.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "display_mode_core_structs.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h new file mode 100644 index 000000000000..14d0ae03dce6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_LEGACY_H__ +#define __DML2_TOP_LEGACY_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out); +#endif /* __DML2_TOP_LEGACY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c deleted file mode 100644 index d0e026d981b5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_top_optimization.h" -#include "dml2_internal_shared_types.h" -#include "dml_top_mcache.h" - -static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) -{ - memcpy(dst, src, sizeof(struct display_configuation_with_meta)); -} - -bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - state->performed = true; - - return true; -} - -bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - return state->min_clk_index_for_latency == 0; -} - -bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) -{ - bool result = false; - - if (params->display_config->stage1.min_clk_index_for_latency > 0) { - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - params->optimized_display_config->stage1.min_clk_index_for_latency--; - result = true; - } - - return result; -} - -bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - bool mcache_success = false; - bool result = false; - - memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); - - l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; - l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; - l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - - result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations - - if (result) { - l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; - - dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); - - l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; - l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; - l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; - - mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works - - memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); - } - - return mcache_success; -} - -bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - bool optimize_success = false; - - if (params->last_candidate_supported == false) - return false; - - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - - l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; - l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; - l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; - l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; - l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; - - optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); - - return optimize_success; -} - -bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->vmin.init_params.instance = ¶ms->dml->pmo_instance; - l->vmin.init_params.base_display_config = params->display_config; - return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); -} - -bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; - l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; - l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; - return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); -} - -bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - if (params->last_candidate_supported == false) - return false; - - l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; - l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; - l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; - return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); -} - -bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - bool test_passed = false; - bool optimize_succeeded = true; - bool candidate_validation_passed = true; - struct optimization_init_function_params init_params = { 0 }; - struct optimization_test_function_params test_params = { 0 }; - struct optimization_optimize_function_params optimize_params = { 0 }; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - - init_params.locals = &l->init_function_locals; - init_params.dml = params->dml; - init_params.display_config = &l->cur_candidate_display_cfg; - - if (params->init_function && !params->init_function(&init_params)) - return false; - - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->cur_candidate_display_cfg; - - test_passed = params->test_function(&test_params); - - while (!test_passed && optimize_succeeded) { - memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); - - optimize_params.locals = &l->optimize_function_locals; - optimize_params.dml = params->dml; - optimize_params.display_config = &l->cur_candidate_display_cfg; - optimize_params.optimized_display_config = &l->next_candidate_display_cfg; - optimize_params.last_candidate_supported = candidate_validation_passed; - - optimize_succeeded = params->optimize_function(&optimize_params); - - if (optimize_succeeded) { - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - - if (l->next_candidate_display_cfg.stage3.performed) - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; - else - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; - - candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); - - l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - } - - if (optimize_succeeded && candidate_validation_passed) { - memset(&test_params, 0, sizeof(struct optimization_test_function_params)); - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->next_candidate_display_cfg; - test_passed = params->test_function(&test_params); - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); - - // If optimization is not all or nothing, then store partial progress in output - if (!params->all_or_nothing) - copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); - } - } - - if (test_passed) - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return test_passed; -} - -bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - int highest_state, lowest_state, cur_state; - bool supported = false; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; - lowest_state = 0; - - while (highest_state > lowest_state) { - cur_state = (highest_state + lowest_state) / 2; - - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - l->mode_support_params.min_clk_index = cur_state; - - supported = params->dml->core_instance.mode_support(&l->mode_support_params); - - if (supported) { - l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - highest_state = cur_state; - } else { - lowest_state = cur_state + 1; - } - } - l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; - - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return true; -} - -bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); -} - -bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.test_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); -} - -bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.optimize_params.base_display_config = params->display_config; - l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; - l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; - - return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); -} - -bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); -} - -bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); -} - -bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; - l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; - return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h deleted file mode 100644 index 9f22ab33eab1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_TOP_OPTIMIZATION_H__ -#define __DML2_TOP_OPTIMIZATION_H__ - -#include "dml2_external_lib_deps.h" -#include "dml2_internal_shared_types.h" - -bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); -bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); - -bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c new file mode 100644 index 000000000000..a8f58f8448e4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -0,0 +1,1178 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_soc15.h" +#include "dml2_mcg_factory.h" +#include "dml2_dpmm_factory.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "lib_float_math.h" +#include "dml2_debug.h" +static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; +} + +static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = 0; +} + +static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) +{ + memcpy(dst, src, sizeof(struct display_configuation_with_meta)); +} + +static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + state->performed = true; + + return true; +} + +static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + return state->min_clk_index_for_latency == 0; +} + +static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) +{ + bool result = false; + + if (params->display_config->stage1.min_clk_index_for_latency > 0) { + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + params->optimized_display_config->stage1.min_clk_index_for_latency--; + result = true; + } + + return result; +} + +static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + bool mcache_success = false; + bool result = false; + + memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); + + l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; + l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; + l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + + result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations + + if (result) { + l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; + + dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); + + l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; + l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; + l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; + + mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works + + memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); + } + + return mcache_success; +} + +static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + bool optimize_success = false; + + if (params->last_candidate_supported == false) + return false; + + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + + l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; + l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; + l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; + l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; + l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; + + optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); + + return optimize_success; +} + +static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->vmin.init_params.instance = ¶ms->dml->pmo_instance; + l->vmin.init_params.base_display_config = params->display_config; + return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); +} + +static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; + l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; + l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; + return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); +} + +static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + if (params->last_candidate_supported == false) + return false; + + l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; + l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; + l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; + return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); +} + +static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); +} + +static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.test_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); +} + +static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.optimize_params.base_display_config = params->display_config; + l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; + l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; + + return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); +} + +static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; + l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; + return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + bool test_passed = false; + bool optimize_succeeded = true; + bool candidate_validation_passed = true; + struct optimization_init_function_params init_params = { 0 }; + struct optimization_test_function_params test_params = { 0 }; + struct optimization_optimize_function_params optimize_params = { 0 }; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + + init_params.locals = &l->init_function_locals; + init_params.dml = params->dml; + init_params.display_config = &l->cur_candidate_display_cfg; + + if (params->init_function && !params->init_function(&init_params)) + return false; + + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->cur_candidate_display_cfg; + + test_passed = params->test_function(&test_params); + + while (!test_passed && optimize_succeeded) { + memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); + + optimize_params.locals = &l->optimize_function_locals; + optimize_params.dml = params->dml; + optimize_params.display_config = &l->cur_candidate_display_cfg; + optimize_params.optimized_display_config = &l->next_candidate_display_cfg; + optimize_params.last_candidate_supported = candidate_validation_passed; + + optimize_succeeded = params->optimize_function(&optimize_params); + + if (optimize_succeeded) { + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + + if (l->next_candidate_display_cfg.stage3.performed) + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; + else + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; + candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); + l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + } + + if (optimize_succeeded && candidate_validation_passed) { + memset(&test_params, 0, sizeof(struct optimization_test_function_params)); + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->next_candidate_display_cfg; + test_passed = params->test_function(&test_params); + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); + + // If optimization is not all or nothing, then store partial progress in output + if (!params->all_or_nothing) + copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); + } + } + + if (test_passed) + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return test_passed; +} + +static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + int highest_state, lowest_state, cur_state; + bool supported = false; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; + lowest_state = 0; + + while (highest_state > lowest_state) { + cur_state = (highest_state + lowest_state) / 2; + + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + l->mode_support_params.min_clk_index = cur_state; + supported = params->dml->core_instance.mode_support(&l->mode_support_params); + + if (supported) { + l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + highest_state = cur_state; + } else { + lowest_state = cur_state + 1; + } + } + l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; + + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return true; +} + +/* +* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. +* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means +* that the horizontal viewport does not span more than 2 cache slices. +* +* It optionally also can apply a constant shift to all the cache boundaries. +*/ +static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; +static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; + +static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, + int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) +{ + const int MAX_VP = 0xFFFFFF; + int left_cache_id; + int right_cache_id; + int range_start; + int range_end; + bool success = false; + + if (num_boundaries <= 1) { + if (first_offset && second_offset) { + *first_offset = 0; + *second_offset = -1; + } + success = true; + return success; + } else { + range_start = 0; + for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { + range_end = mcache_boundaries[left_cache_id] - shift - 1; + + if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) + break; + + range_start = range_end + 1; + } + + range_end = MAX_VP; + for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { + if (right_cache_id >= 0) + range_start = mcache_boundaries[right_cache_id] - shift; + else + range_start = 0; + + if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { + break; + } + range_end = range_start - 1; + } + right_cache_id = (right_cache_id + 1) % num_boundaries; + + if (right_cache_id == left_cache_id) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = -1; + } + success = true; + } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = right_cache_id; + } + success = true; + } + } + + return success; +} + +/* +* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. +* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. +*/ +static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, + int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) +{ + int max_shift = 0xFFFF; + unsigned int pipe_index; + unsigned int i, slice_width; + bool success = false; + + for (i = 0; i < num_boundaries; i++) { + if (i == 0) + slice_width = mcache_boundaries[i]; + else + slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; + + if (max_shift > (int)slice_width) { + max_shift = slice_width; + } + } + + for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { + success = true; + for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { + if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, + pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { + success = false; + break; + } + } + if (success) + break; + } + + return success; +} + +/* +* Counts the number of elements inside input array within the given span length. +* Formally, what is the size of the largest subset of the array where the largest and smallest element +* differ no more than the span. +*/ +static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) +{ + unsigned int i; + unsigned int span_start_value; + unsigned int span_start_index; + unsigned int greatest_element_count; + + if (array_size == 0) + return 1; + + if (span == 0) + return array_size > 0 ? 1 : 0; + + span_start_value = 0; + span_start_index = 0; + greatest_element_count = 0; + + while (span_start_index < array_size) { + for (i = span_start_index; i < array_size; i++) { + if (array[i] - span_start_value <= span) { + if (i - span_start_index + 1 > greatest_element_count) { + greatest_element_count = i - span_start_index + 1; + } + } else + break; + } + + span_start_index++; + + if (span_start_index < array_size) { + span_start_value = array[span_start_index - 1] + 1; + } + } + + return greatest_element_count; +} + +static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, + enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) +{ + int i, slice_width; + const char MAX_SCL_VP_OVERLAP = 3; + bool success = false; + + switch (scaling_transform) { + case dml2_scaling_transform_centered: + case dml2_scaling_transform_aspect_ratio: + case dml2_scaling_transform_fullscreen: + slice_width = full_vp_width / num_pipes; + for (i = 0; i < num_pipes; i++) { + pipe_vp_x_start[i] = i * slice_width; + pipe_vp_x_end[i] = (i + 1) * slice_width - 1; + + if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) + pipe_vp_x_start[i] = 0; + else + pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; + + if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) + pipe_vp_x_end[i] = full_vp_width - 1; + else + pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; + } + break; + case dml2_scaling_transform_explicit: + default: + success = false; + break; + } + + return success; +} + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; + + const int MAX_PIXEL_OVERLAP = 6; + int max_per_pipe_vp_p0 = 0; + int max_per_pipe_vp_p1 = 0; + int temp, p0shift, p1shift; + unsigned int plane_index = 0; + unsigned int i; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; + unsigned int num_boundaries; + enum dml2_scaling_transform scaling_transform; + const struct dml2_plane_parameters *plane; + const struct dml2_stream_parameters *stream; + + bool p0pass = false; + bool p1pass = false; + bool all_pass = true; + + for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { + if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) + continue; + + plane = ¶ms->display_cfg->plane_descriptors[plane_index]; + stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; + + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + + if (odm_combine_factor == 1) + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + else + mpc_combine_factor = 1; + + if (odm_combine_factor > 1) { + max_per_pipe_vp_p0 = plane->surface.plane0.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p0) + max_per_pipe_vp_p0 = temp; + + max_per_pipe_vp_p1 = plane->surface.plane1.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p1) + max_per_pipe_vp_p1 = temp; + } else { + max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; + max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; + } + + max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; + max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; + + p0shift = 0; + p1shift = 0; + + // The last element in the unshifted boundary array will always be the first pixel outside the + // plane, which means theres no mcache associated with it, so -1 + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { + p0pass = true; + } + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { + p1pass = true; + } + + if (!p0pass || !p1pass) { + if (odm_combine_factor > 1) { + num_dpps = odm_combine_factor; + scaling_transform = plane->composition.scaling_transform; + } else { + num_dpps = mpc_combine_factor; + scaling_transform = dml2_scaling_transform_fullscreen; + } + + if (!p0pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane0.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + params->mcache_allocations[plane_index].num_mcaches_plane0, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); + } + } + if (!p1pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane1.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + params->mcache_allocations[plane_index].num_mcaches_plane1, + &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); + } + } + } + + if (p0pass && p1pass) { + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; + } + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; + } + } + + params->per_plane_status[plane_index] = p0pass && p1pass; + all_pass &= p0pass && p1pass; + } + + return all_pass; +} + +static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) +{ + // Initialize all entries to special valid MCache ID and special valid split coordinate + per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; +} + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) +{ + int i; + unsigned int j; + int next_unused_cache_id = 0; + + for (i = 0; i < params->num_allocations; i++) { + if (!params->allocations[i].valid) + continue; + + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_plane0[0]; + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_plane1[0]; + + // If we need dedicated caches for mall requesting, then we assign them here. + if (params->allocations[i].requires_dedicated_mall_mcache) { + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_mall_plane0[0]; + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_mall_plane1[0]; + } + + // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same + // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the + // largest mcacheID of P0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + + // If we need dedicated caches handle last slice sharing + if (params->allocations[i].requires_dedicated_mall_mcache) { + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular + // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) + if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; + } + } + + // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting + if (!params->allocations[i].requires_dedicated_mall_mcache) { + memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, + sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); + memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, + sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); + } + } +} + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; + + unsigned int total_mcaches_required; + unsigned int i; + bool result = false; + + if (dml->soc_bbox.num_dcc_mcaches == 0) { + return true; + } + + total_mcaches_required = 0; + l->calc_mcache_params.instance = &dml->core_instance; + for (i = 0; i < params->display_config->num_planes; i++) { + if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { + memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); + continue; + } + + l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; + l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; + l->calc_mcache_params.plane_index = i; + + if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { + result = false; + break; + } + + if (params->mcache_allocations[i].valid) { + total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; + if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) + total_mcaches_required--; + } + } + dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + + if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { + result = false; + } else { + result = true; + } + + return result; +} + +static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; + + bool result = false; + bool mcache_success = false; + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (result) { + struct optimization_phase_params mcache_phase = { + .dml = dml, + .display_config = &l->base_display_config_with_meta, + .test_function = dml2_top_optimization_test_function_mcache, + .optimize_function = dml2_top_optimization_optimize_function_mcache, + .optimized_display_config = &l->optimized_display_config_with_meta, + .all_or_nothing = false, + }; + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = dpmm_programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + } + + in_out->is_supported = mcache_success; + result = result && in_out->is_supported; + + return result; +} + +static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; + + bool result = false; + bool mcache_success = false; + bool uclk_pstate_success = false; + bool vmin_success = false; + bool stutter_success = false; + unsigned int i; + + memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); + memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); + + memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); + + setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + dml->core_instance.populate_informative(&l->informative_params); + + return false; + } + + /* + * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode + */ + memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); + l->min_clock_for_latency_phase.dml = dml; + l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; + l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; + l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->min_clock_for_latency_phase.all_or_nothing = false; + + dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + } + + /* + * Phase 2: Satisfy DCC mcache requirements + */ + memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); + l->mcache_phase.dml = dml; + l->mcache_phase.display_config = &l->base_display_config_with_meta; + l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; + l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; + l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->mcache_phase.all_or_nothing = true; + + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); + + if (!mcache_success) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + + dml->core_instance.populate_informative(&l->informative_params); + + in_out->programming->informative.failed_mcache_validation = true; + return false; + } + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + + /* + * Phase 3: Optimize for Pstate + */ + memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); + l->uclk_pstate_phase.dml = dml; + l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; + l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; + l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; + l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; + l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->uclk_pstate_phase.all_or_nothing = true; + + uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); + + if (uclk_pstate_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage3.success = true; + } + + /* + * Phase 4: Optimize for Vmin + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->vmin_phase.dml = dml; + l->vmin_phase.display_config = &l->base_display_config_with_meta; + l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; + l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; + l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; + l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->vmin_phase.all_or_nothing = false; + + vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); + + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = vmin_success; + } + + /* + * Phase 5: Optimize for Stutter + */ + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + l->stutter_phase.dml = dml; + l->stutter_phase.display_config = &l->base_display_config_with_meta; + l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; + l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; + l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; + l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->stutter_phase.all_or_nothing = true; + + stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); + + if (stutter_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage5.success = true; + } + + /* + * Populate mcache programming + */ + for (i = 0; i < in_out->display_config->num_planes; i++) { + in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i]; + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = in_out->programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + if (!result) + in_out->programming->informative.failed_dpmm = true; + } + + if (result) { + l->mode_programming_params.instance = &dml->core_instance; + l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; + l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; + l->mode_programming_params.programming = in_out->programming; + result = dml->core_instance.mode_programming(&l->mode_programming_params); + if (!result) + in_out->programming->informative.failed_mode_programming = true; + } + + if (result) { + l->dppm_map_watermarks_params.core = &dml->core_instance; + l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_watermarks_params.programming = in_out->programming; + result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); + } + + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = result; + + dml->core_instance.populate_informative(&l->informative_params); + + return result; +} + +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) +{ + bool success = true; + int config_index, pipe_index; + int first_offset, second_offset; + int free_per_plane_reg_index = 0; + + memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); + + for (config_index = 0; config_index < params->num_configurations; config_index++) { + for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { + // Allocate storage for the mcache regs + params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; + + reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); + + if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { + // P0 always enabled + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + } + + // Populate P1 if enabled + if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + } + } + } + } + } + + return success; +} + +static const struct dml2_top_funcs soc15_funcs = { + .check_mode_supported = dml2_top_soc15_check_mode_supported, + .build_mode_programming = dml2_top_soc15_build_mode_programming, + .build_mcache_programming = dml2_top_soc15_build_mcache_programming, +}; + +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; + struct dml2_core_initialize_in_out core_init_params = { 0 }; + struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; + struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; + bool result = false; + + memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); + + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + dml->project_id = in_out->options.project_id; + dml->pmo_options = in_out->options.pmo_options; + + // Initialize All Components + result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); + + if (result) + result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); + + if (result) + result = dml2_core_create(in_out->options.project_id, &dml->core_instance); + + if (result) { + mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; + mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; + result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); + } + + if (result) { + core_init_params.project_id = in_out->options.project_id; + core_init_params.instance = &dml->core_instance; + core_init_params.minimum_clock_table = &dml->min_clk_table; + core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; + core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; + core_init_params.ip_caps = &in_out->ip_caps; + core_init_params.soc_bb = &in_out->soc_bb; + result = dml->core_instance.initialize(&core_init_params); + + if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + } + } + + if (result) + result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); + + if (result) { + pmo_init_params.instance = &dml->pmo_instance; + pmo_init_params.soc_bb = &dml->soc_bbox; + pmo_init_params.ip_caps = &dml->ip_caps; + pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; + pmo_init_params.options = &dml->pmo_options; + dml->pmo_instance.initialize(&pmo_init_params); + } + dml->funcs = soc15_funcs; + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h index 7b1f6f7143d0..53bd8602f9ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h @@ -2,22 +2,13 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#ifndef __DML_TOP_MCACHE_H__ -#define __DML_TOP_MCACHE_H__ - -#include "dml2_external_lib_deps.h" -#include "dml_top_display_cfg_types.h" -#include "dml_top_types.h" +#ifndef __DML2_TOP_SOC15_H__ +#define __DML2_TOP_SOC15_H__ #include "dml2_internal_shared_types.h" +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out); bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); - void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); - bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); - -bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); - -bool dml2_top_mcache_unit_test(void); - -#endif +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); +#endif /* __DML2_TOP_SOC15_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c deleted file mode 100644 index a342ebfbe4e7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ /dev/null @@ -1,549 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_debug.h" - -#include "dml_top_mcache.h" -#include "lib_float_math.h" - -#include "dml2_internal_shared_types.h" - -/* -* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. -* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means -* that the horizontal viewport does not span more than 2 cache slices. -* -* It optionally also can apply a constant shift to all the cache boundaries. -*/ -static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; -static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; - -static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, - int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) -{ - const int MAX_VP = 0xFFFFFF; - int left_cache_id; - int right_cache_id; - int range_start; - int range_end; - bool success = false; - - if (num_boundaries <= 1) { - if (first_offset && second_offset) { - *first_offset = 0; - *second_offset = -1; - } - success = true; - return success; - } else { - range_start = 0; - for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { - range_end = mcache_boundaries[left_cache_id] - shift - 1; - - if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) - break; - - range_start = range_end + 1; - } - - range_end = MAX_VP; - for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { - if (right_cache_id >= 0) - range_start = mcache_boundaries[right_cache_id] - shift; - else - range_start = 0; - - if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { - break; - } - range_end = range_start - 1; - } - right_cache_id = (right_cache_id + 1) % num_boundaries; - - if (right_cache_id == left_cache_id) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = -1; - } - success = true; - } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = right_cache_id; - } - success = true; - } - } - - return success; -} - -/* -* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. -* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. -*/ -static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, - int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) -{ - int max_shift = 0xFFFF; - unsigned int pipe_index; - unsigned int i, slice_width; - bool success = false; - - for (i = 0; i < num_boundaries; i++) { - if (i == 0) - slice_width = mcache_boundaries[i]; - else - slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; - - if (max_shift > (int)slice_width) { - max_shift = slice_width; - } - } - - for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { - success = true; - for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { - if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, - pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { - success = false; - break; - } - } - if (success) - break; - } - - return success; -} - -/* -* Counts the number of elements inside input array within the given span length. -* Formally, what is the size of the largest subset of the array where the largest and smallest element -* differ no more than the span. -*/ -static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) -{ - unsigned int i; - unsigned int span_start_value; - unsigned int span_start_index; - unsigned int greatest_element_count; - - if (array_size == 0) - return 1; - - if (span == 0) - return array_size > 0 ? 1 : 0; - - span_start_value = 0; - span_start_index = 0; - greatest_element_count = 0; - - while (span_start_index < array_size) { - for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value <= span) { - if (i - span_start_index + 1 > greatest_element_count) { - greatest_element_count = i - span_start_index + 1; - } - } else - break; - } - - span_start_index++; - - if (span_start_index < array_size) { - span_start_value = array[span_start_index - 1] + 1; - } - } - - return greatest_element_count; -} - -static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, - enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) -{ - int i, slice_width; - const char MAX_SCL_VP_OVERLAP = 3; - bool success = false; - - switch (scaling_transform) { - case dml2_scaling_transform_centered: - case dml2_scaling_transform_aspect_ratio: - case dml2_scaling_transform_fullscreen: - slice_width = full_vp_width / num_pipes; - for (i = 0; i < num_pipes; i++) { - pipe_vp_x_start[i] = i * slice_width; - pipe_vp_x_end[i] = (i + 1) * slice_width - 1; - - if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) - pipe_vp_x_start[i] = 0; - else - pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; - - if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) - pipe_vp_x_end[i] = full_vp_width - 1; - else - pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; - } - break; - case dml2_scaling_transform_explicit: - default: - success = false; - break; - } - - return success; -} - -bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; - - const int MAX_PIXEL_OVERLAP = 6; - int max_per_pipe_vp_p0 = 0; - int max_per_pipe_vp_p1 = 0; - int temp, p0shift, p1shift; - unsigned int plane_index = 0; - unsigned int i; - unsigned int odm_combine_factor; - unsigned int mpc_combine_factor; - unsigned int num_dpps; - unsigned int num_boundaries; - enum dml2_scaling_transform scaling_transform; - const struct dml2_plane_parameters *plane; - const struct dml2_stream_parameters *stream; - - bool p0pass = false; - bool p1pass = false; - bool all_pass = true; - - for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { - if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) - continue; - - plane = ¶ms->display_cfg->plane_descriptors[plane_index]; - stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - - num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; - - if (odm_combine_factor == 1) - num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; - else - mpc_combine_factor = 1; - - if (odm_combine_factor > 1) { - max_per_pipe_vp_p0 = plane->surface.plane0.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p0) - max_per_pipe_vp_p0 = temp; - - max_per_pipe_vp_p1 = plane->surface.plane1.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p1) - max_per_pipe_vp_p1 = temp; - } else { - max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; - max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; - } - - max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; - max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; - - p0shift = 0; - p1shift = 0; - - // The last element in the unshifted boundary array will always be the first pixel outside the - // plane, which means theres no mcache associated with it, so -1 - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { - p0pass = true; - } - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { - p1pass = true; - } - - if (!p0pass || !p1pass) { - if (odm_combine_factor > 1) { - num_dpps = odm_combine_factor; - scaling_transform = plane->composition.scaling_transform; - } else { - num_dpps = mpc_combine_factor; - scaling_transform = dml2_scaling_transform_fullscreen; - } - - if (!p0pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane0.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - params->mcache_allocations[plane_index].num_mcaches_plane0, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); - } - } - if (!p1pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane1.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - params->mcache_allocations[plane_index].num_mcaches_plane1, - &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); - } - } - } - - if (p0pass && p1pass) { - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; - } - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; - } - } - - params->per_plane_status[plane_index] = p0pass && p1pass; - all_pass &= p0pass && p1pass; - } - - return all_pass; -} - -static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) -{ - // Initialize all entries to special valid MCache ID and special valid split coordinate - per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; -} - -bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) -{ - bool success = true; - int config_index, pipe_index; - int first_offset, second_offset; - int free_per_plane_reg_index = 0; - - memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); - - for (config_index = 0; config_index < params->num_configurations; config_index++) { - for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { - // Allocate storage for the mcache regs - params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; - - reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); - - if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { - // P0 always enabled - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - } - - // Populate P1 if enabled - if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - } - } - } - } - } - - return success; -} - -void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) -{ - int i; - unsigned int j; - int next_unused_cache_id = 0; - - for (i = 0; i < params->num_allocations; i++) { - if (!params->allocations[i].valid) - continue; - - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_plane0[0]; - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_plane1[0]; - - // If we need dedicated caches for mall requesting, then we assign them here. - if (params->allocations[i].requires_dedicated_mall_mcache) { - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_mall_plane0[0]; - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_mall_plane1[0]; - } - - // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same - // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the - // largest mcacheID of P0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - - // If we need dedicated caches handle last slice sharing - if (params->allocations[i].requires_dedicated_mall_mcache) { - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular - // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) - if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; - } - } - - // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting - if (!params->allocations[i].requires_dedicated_mall_mcache) { - memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, - sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); - memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, - sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); - } - } -} - -bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; - - unsigned int total_mcaches_required; - unsigned int i; - bool result = false; - - if (dml->soc_bbox.num_dcc_mcaches == 0) { - return true; - } - - total_mcaches_required = 0; - l->calc_mcache_params.instance = &dml->core_instance; - for (i = 0; i < params->display_config->num_planes; i++) { - if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { - memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); - continue; - } - - l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; - l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; - l->calc_mcache_params.plane_index = i; - - if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { - result = false; - break; - } - - if (params->mcache_allocations[i].valid) { - total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; - if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) - total_mcaches_required--; - } - } - dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); - - if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { - result = false; - } else { - result = true; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index e9b8e10695ae..f95c7ff56f15 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -4,6 +4,11 @@ #include "dml2_debug.h" +int dml2_log_internal(const char *format, ...) +{ + return 0; +} + int dml2_printf(const char *format, ...) { #ifdef _DEBUG diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index d51a1b6c62f2..a27792b56f7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -8,9 +8,53 @@ #ifdef _DEBUG #define DML2_ASSERT(condition) dml2_assert(condition) #else -#define DML2_ASSERT(condition) +#define DML2_ASSERT(condition) ((void)0) +#endif +/* + * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart. + * DML_LOG_ERROR - unexpected but recoverable failures inside DML + * DML_LOG_WARN - unexpected inputs or events to DML + * DML_LOG_INFO - high level tracing of DML interfaces + * DML_LOG_DEBUG - detailed tracing of DML internal components + * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure + */ +#if !defined(DML_LOG_LEVEL) +#if defined(_DEBUG) && defined(_DEBUG_PRINTS) +/* for backward compatibility with old macros */ +#define DML_LOG_LEVEL 5 +#else +#define DML_LOG_LEVEL 0 +#endif +#endif + +#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= 1 +#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_ERROR(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 2 +#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_WARN(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 3 +#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_INFO(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 4 +#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_DEBUG(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 5 +#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_VERBOSE(fmt, ...) ((void)0) #endif +int dml2_log_internal(const char *format, ...); int dml2_printf(const char *format, ...); void dml2_assert(int condition); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index aeac9f159fa5..7fb6026bcb49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -8,7 +8,6 @@ #include "dml2_external_lib_deps.h" #include "dml_top_types.h" #include "dml2_core_shared_types.h" - /* * DML2 MCG Types and Interfaces */ @@ -63,7 +62,6 @@ struct dml2_mcg_build_min_clock_table_params_in_out { */ struct dml2_mcg_min_clock_table *min_clk_table; }; - struct dml2_mcg_instance { bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); bool (*unit_test)(void); @@ -81,7 +79,6 @@ struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { struct dml2_soc_bb *soc_bb; struct dml2_mcg_min_clock_table *min_clk_table; const struct display_configuation_with_meta *display_cfg; - struct { bool perform_pseudo_map; struct dml2_core_internal_soc_bb *soc_bb; @@ -309,7 +306,7 @@ struct dml2_optimization_stage3_state { // The pstate support mode for each plane // The number of valid elements == display_cfg.num_planes // The indexing of pstate_switch_modes matches plane_descriptors[] - enum dml2_uclk_pstate_support_method pstate_switch_modes[DML2_MAX_PLANES]; + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; // Meta-data for implicit SVP generation, indexed by stream index struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; @@ -356,6 +353,10 @@ struct display_configuation_with_meta { struct dml2_optimization_stage5_state stage5; }; +struct dml2_pmo_pstate_strategy { + enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; struct dml2_core_mode_support_in_out { /* * Inputs @@ -365,7 +366,6 @@ struct dml2_core_mode_support_in_out { struct dml2_mcg_min_clock_table *min_clk_table; int min_clk_index; - /* * Outputs */ @@ -395,7 +395,6 @@ struct dml2_core_mode_programming_in_out { struct dml2_core_instance *instance; const struct display_configuation_with_meta *display_cfg; const struct core_display_cfg_support_info *cfg_support_info; - /* * Outputs (also Input the clk freq are also from programming struct) */ @@ -445,6 +444,7 @@ struct dml2_core_internal_state_intermediates { struct dml2_core_mode_support_locals { struct dml2_core_calcs_mode_support_ex mode_support_ex_params; struct dml2_display_cfg svp_expanded_display_cfg; + struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; }; struct dml2_core_mode_programming_locals { @@ -600,34 +600,11 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_method { - dml2_pmo_pstate_strategy_na = 0, - /* hw exclusive modes */ - dml2_pmo_pstate_strategy_vactive = 1, - dml2_pmo_pstate_strategy_vblank = 2, - dml2_pmo_pstate_strategy_reserved_hw = 5, - /* fw assisted exclusive modes */ - dml2_pmo_pstate_strategy_fw_svp = 6, - dml2_pmo_pstate_strategy_reserved_fw = 10, - /* fw assisted modes requiring drr modulation */ - dml2_pmo_pstate_strategy_fw_vactive_drr = 11, - dml2_pmo_pstate_strategy_fw_vblank_drr = 12, - dml2_pmo_pstate_strategy_fw_svp_drr = 13, - dml2_pmo_pstate_strategy_reserved_fw_drr_clamped = 20, - dml2_pmo_pstate_strategy_fw_drr = 21, - dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, -}; - -struct dml2_pmo_pstate_strategy { - enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; - bool allow_state_increase; -}; - -#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) -#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) -#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) -#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_drr) -#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_svp + 1)) - 1) << dml2_pmo_pstate_strategy_fw_svp) +#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na) +#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr) +#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp) #define PMO_DCN4_MAX_DISPLAYS 4 #define PMO_DCN4_MAX_NUM_VARIANTS 2 @@ -645,6 +622,8 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { + struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2]; + unsigned int num_expanded_override_strategies; struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -706,7 +685,6 @@ struct dml2_pmo_instance { int mpc_combine_limit; int odm_combine_limit; int mcg_clock_table_size; - union { struct { struct { @@ -963,7 +941,13 @@ struct dml2_top_mcache_validate_admissability_locals { struct dml2_top_display_cfg_support_info { const struct dml2_display_cfg *display_config; struct core_display_cfg_support_info core_info; - enum dml2_pstate_support_method per_plane_pstate_method[DML2_MAX_PLANES]; +}; + +struct dml2_top_funcs { + bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out); + bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out); + bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out); + bool (*unit_test)(void); }; struct dml2_instance { @@ -978,8 +962,8 @@ struct dml2_instance { struct dml2_ip_capabilities ip_caps; struct dml2_mcg_min_clock_table min_clk_table; - struct dml2_pmo_options pmo_options; + struct dml2_top_funcs funcs; struct { struct dml2_initialize_instance_locals initialize_instance_locals; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 3d29169dd6bb..6b3b8803e0ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -813,7 +813,7 @@ static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struc { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < context->stream_count; i++) if (context->streams[i] == stream) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index bde4250853b1..b416320873e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -553,13 +553,53 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } } - dml2_policy_build_synthetic_soc_states(s, p); - if (dml2->v20.dml_core_ctx.project == dml_project_dcn35) { - // Override last out_state with data from last in_state - // This will ensure that out_state contains max fclk - memcpy(&p->out_states->state_array[p->out_states->num_states - 1], - &p->in_states->state_array[p->in_states->num_states - 1], - sizeof(struct soc_state_bounding_box_st)); + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, + max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0; + + for (i = 0; i < p->in_states->num_states; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + } + + for (i = 0; i < p->in_states->num_states; i++) { + /* Independent states - including base (unlisted) parameters from state 0. */ + p->out_states->state_array[i] = p->in_states->state_array[0]; + + p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz; + p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + + p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + + /* Dependent states. */ + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; + p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz; + } + + p->out_states->num_states = p->in_states->num_states; + } else { + dml2_policy_build_synthetic_soc_states(s, p); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 9190c1328d5b..68b882d28195 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -531,14 +531,21 @@ static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct d static bool call_dml_mode_support_and_programming(struct dc_state *context) { unsigned int result = 0; - unsigned int min_state; + unsigned int min_state = 0; int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + struct dml2_context *dml2 = context->bw_ctx.dml2; struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); - ASSERT(min_state_for_g6_temp_read >= 0); + ASSERT(min_state_for_g6_temp_read >= 0); + } if (!dml2->config.use_native_pstate_optimization) { result = optimize_pstate_with_svp_and_drr(dml2, context); @@ -549,14 +556,20 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. */ - if (min_state_for_g6_temp_read >= 0) - min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; - else - min_state = s->mode_support_params.out_lowest_state_idx; - - if (result) - result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } return result; } @@ -685,6 +698,8 @@ static bool dml2_validate_only(struct dc_state *context) build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); result = pack_and_call_dml_mode_support_ex(dml2, &dml2->v20.scratch.cur_display_config, @@ -732,11 +747,10 @@ static inline struct dml2_context *dml2_allocate_memory(void) static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { - dml21_reinit(in_dc, dml2, config); + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { + dml21_reinit(in_dc, dml2, config); return; - } + } // Store config options (*dml2)->config = *config; @@ -771,10 +785,8 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) return dml21_create(in_dc, dml2, config); - } // Allocate Mode Lib Ctx *dml2 = dml2_allocate_memory(); @@ -842,8 +854,7 @@ void dml2_reinit(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { dml21_reinit(in_dc, dml2, config); return; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c index 377ef6d01ae5..00d22e542469 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c @@ -427,18 +427,6 @@ void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); - // hack for FPGA - /* NOTE: We dont have getenv defined in driver and it does not make any sense in the driver */ - /*char* fpga_env = getenv("FPGA_FPDIV"); - if(fpga_env !=NULL) - { - if(disp_dlg_regs->vratio_prefetch >= (dml_uint_t)dml_pow(2, 22)) - { - disp_dlg_regs->vratio_prefetch = (dml_uint_t)dml_pow(2, 22)-1; - dml_print("FPGA msg: vratio_prefetch exceed the max value, the register field is [21:0]\n"); - } - }*/ - disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index e1da48b05d00..75fb77bca83b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -194,6 +194,9 @@ void dpp_reset(struct dpp *dpp_base) dpp->filter_h = NULL; dpp->filter_v = NULL; + memset(&dpp_base->pos, 0, sizeof(dpp_base->pos)); + memset(&dpp_base->att, 0, sizeof(dpp_base->att)); + memset(&dpp->scl_data, 0, sizeof(dpp->scl_data)); memset(&dpp->pwl_data, 0, sizeof(dpp->pwl_data)); } @@ -480,10 +483,11 @@ void dpp1_set_cursor_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - REG_UPDATE(CURSOR0_CONTROL, - CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp1_cnv_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 3b6ca7974e18..1236e0f9a256 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -154,9 +154,11 @@ void dpp401_set_cursor_position( struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); uint32_t cur_en = pos->enable ? 1 : 0; - REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp401_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index d9aaebfa3a0a..11535922b5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -30,6 +30,9 @@ #include "rc_calc.h" #include "fixed31_32.h" +#define DC_LOGGER \ + dsc->ctx->logger + /* This module's internal functions */ /* default DSC policy target bitrate limit is 16bpp */ @@ -480,6 +483,48 @@ bool dc_dsc_compute_bandwidth_range( return is_dsc_possible; } +void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, + const struct dc_crtc_timing *timing) +{ + struct dsc_enc_caps dsc_enc_caps; + + get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + + DC_LOG_DSC("dsc encoder caps:"); + DC_LOG_DSC("\tdsc_version 0x%x", dsc_enc_caps.dsc_version); + DC_LOG_DSC("\tslice_caps 0x%x", dsc_enc_caps.slice_caps.raw); + DC_LOG_DSC("\tlb_bit_depth %d", dsc_enc_caps.lb_bit_depth); + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_enc_caps.is_block_pred_supported); + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_enc_caps.color_formats.raw); + DC_LOG_DSC("\tcolor_depth 0x%x", dsc_enc_caps.color_depth.raw); + DC_LOG_DSC("\tmax_total_throughput_mps %d", dsc_enc_caps.max_total_throughput_mps); + DC_LOG_DSC("\tmax_slice_width %d", dsc_enc_caps.max_slice_width); + DC_LOG_DSC("\tbpp_increment_div %d", dsc_enc_caps.bpp_increment_div); +} + +void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, + const struct dsc_dec_dpcd_caps *dsc_sink_caps) +{ + DC_LOG_DSC("dsc decoder caps:"); + DC_LOG_DSC("\tis_dsc_supported %d", dsc_sink_caps->is_dsc_supported); + DC_LOG_DSC("\tdsc_version 0x%x", dsc_sink_caps->dsc_version); + DC_LOG_DSC("\trc_buffer_size %d", dsc_sink_caps->rc_buffer_size); + DC_LOG_DSC("\tslice_caps1 0x%x", dsc_sink_caps->slice_caps1.raw); + DC_LOG_DSC("\tslice_caps2 0x%x", dsc_sink_caps->slice_caps2.raw); + DC_LOG_DSC("\tlb_bit_depth %d", dsc_sink_caps->lb_bit_depth); + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_sink_caps->is_block_pred_supported); + DC_LOG_DSC("\tedp_max_bits_per_pixel %d", dsc_sink_caps->edp_max_bits_per_pixel); + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_sink_caps->color_formats.raw); + DC_LOG_DSC("\tthroughput_mode_0_mps %d", dsc_sink_caps->throughput_mode_0_mps); + DC_LOG_DSC("\tthroughput_mode_1_mps %d", dsc_sink_caps->throughput_mode_1_mps); + DC_LOG_DSC("\tmax_slice_width %d", dsc_sink_caps->max_slice_width); + DC_LOG_DSC("\tbpp_increment_div %d", dsc_sink_caps->bpp_increment_div); + DC_LOG_DSC("\tbranch_overall_throughput_0_mps %d", dsc_sink_caps->branch_overall_throughput_0_mps); + DC_LOG_DSC("\tbranch_overall_throughput_1_mps %d", dsc_sink_caps->branch_overall_throughput_1_mps); + DC_LOG_DSC("\tbranch_max_line_width %d", dsc_sink_caps->branch_max_line_width); + DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp); +} + static void get_dsc_enc_caps( const struct display_stream_compressor *dsc, struct dsc_enc_caps *dsc_enc_caps, diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c index fae98cf52020..bc058f682438 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c @@ -270,16 +270,3 @@ void dcn30_dwbc_construct(struct dcn30_dwbc *dwbc30, dwbc30->dwbc_shift = dwbc_shift; dwbc30->dwbc_mask = dwbc_mask; } - -void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay) -{ - struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); - - /* - * Set maximum delay of host read access to DWBSCL LUT or OGAM LUT if there are no - * idle cycles in HW pipeline (in number of clock cycles times 4) - */ - REG_UPDATE(DWB_HOST_READ_CONTROL, DWB_HOST_READ_RATE_CONTROL, host_read_delay); - - DC_LOG_DWB("%s dwb3_rate_control at inst = %d", __func__, dwbc->inst); -} diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h index 0f3f7c5fbaec..7f053f49ec6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h @@ -914,7 +914,6 @@ bool dwb3_ogam_set_input_transfer_func( struct dwbc *dwbc, const struct dc_transfer_func *in_transfer_func_dwb_ogam); -void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay); #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index 22ac2b7e49ae..9b026600b90e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -140,7 +140,7 @@ void hubp1_vready_workaround(struct hubp *hubp, void hubp1_program_tiling( struct hubp *hubp, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); @@ -518,6 +518,20 @@ bool hubp1_program_surface_flip_and_addr( return true; } +void hubp1_clear_tiling(struct hubp *hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_64B_BLK, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_64B_BLK, 0); +} + void hubp1_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks) { @@ -532,10 +546,16 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, SECONDARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk); } +void hubp_reset(struct hubp *hubp) +{ + memset(&hubp->pos, 0, sizeof(hubp->pos)); + memset(&hubp->att, 0, sizeof(hubp->att)); +} + void hubp1_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -1337,8 +1357,9 @@ static void hubp1_wait_pipe_read_start(struct hubp *hubp) void hubp1_init(struct hubp *hubp) { - //do nothing + hubp_reset(hubp); } + static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp1_program_surface_flip_and_addr, @@ -1351,6 +1372,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_set_vm_context0_settings = hubp1_set_vm_context0_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_hubp_blank_en = hubp1_set_hubp_blank_en, .set_cursor_attributes = hubp1_cursor_set_attributes, @@ -1363,6 +1385,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_disable_control = hubp1_disable_control, .hubp_get_underflow_status = hubp1_get_underflow_status, .hubp_init = hubp1_init, + .hubp_clear_tiling = hubp1_clear_tiling, .dmdata_set_attributes = NULL, .dmdata_load = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index 69119b2fdce2..c7765e6f09e6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -706,7 +706,7 @@ struct dcn10_hubp { void hubp1_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -739,13 +739,15 @@ void hubp1_program_rotation( void hubp1_program_tiling( struct hubp *hubp, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp1_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks); +void hubp_reset(struct hubp *hubp); + bool hubp1_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, @@ -794,4 +796,6 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset); void hubp1_set_flip_int(struct hubp *hubp); +void hubp1_clear_tiling(struct hubp *hubp); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 0637e4c552d8..91259b896e03 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -310,7 +310,7 @@ void hubp2_setup_interdependent( */ static void hubp2_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { REG_UPDATE_3(DCSURF_ADDR_CONFIG, @@ -406,6 +406,20 @@ void hubp2_program_rotation( H_MIRROR_EN, mirror); } +void hubp2_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_64B_BLK, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_64B_BLK, 0); +} + void hubp2_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks) { @@ -536,7 +550,7 @@ void hubp2_program_pixel_format( void hubp2_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -1044,11 +1058,13 @@ void hubp2_cursor_set_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, + REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, pos->x, @@ -1660,6 +1676,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp2_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, @@ -1676,6 +1693,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp2_clear_tiling, }; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index 18e194507e36..6968087a3605 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -382,7 +382,7 @@ void hubp2_program_pixel_format( void hubp2_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -409,6 +409,8 @@ void hubp2_read_state_common(struct hubp *hubp); void hubp2_read_state(struct hubp *hubp); +void hubp2_clear_tiling(struct hubp *hubp); + #endif /* __DC_MEM_INPUT_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c index cd2bfcc51276..ec88ee424a7f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c @@ -42,7 +42,7 @@ static void hubp201_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -121,6 +121,7 @@ static struct hubp_funcs dcn201_hubp_funcs = { .set_cursor_position = hubp1_cursor_set_position, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, @@ -131,6 +132,7 @@ static struct hubp_funcs dcn201_hubp_funcs = { .hubp_clear_underflow = hubp1_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp1_init, + .hubp_clear_tiling = hubp1_clear_tiling, }; bool dcn201_hubp_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c index e13d69a22c1c..e2740482e1cf 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c @@ -811,6 +811,8 @@ static void hubp21_init(struct hubp *hubp) struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + hubp_reset(hubp); } static struct hubp_funcs dcn21_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, @@ -823,6 +825,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp21_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp1_cursor_set_position, @@ -837,6 +840,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_init = hubp21_init, .validate_dml_output = hubp21_validate_dml_output, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp1_clear_tiling, }; bool hubp21_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 60a64d290352..be0ac613675a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -318,7 +318,7 @@ bool hubp3_program_surface_flip_and_addr( void hubp3_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { REG_UPDATE_4(DCSURF_ADDR_CONFIG, @@ -334,6 +334,22 @@ void hubp3_program_tiling( } +void hubp3_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_6(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_BLK, 0, + PRIMARY_SURFACE_DCC_IND_BLK_C, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_BLK, 0, + SECONDARY_SURFACE_DCC_IND_BLK_C, 0); +} + void hubp3_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size) { @@ -395,7 +411,7 @@ void hubp3_dmdata_set_attributes( void hubp3_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -483,6 +499,8 @@ void hubp3_init(struct hubp *hubp) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + hubp_reset(hubp); } static struct hubp_funcs dcn30_hubp_funcs = { @@ -497,6 +515,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, @@ -512,6 +531,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index b010531a7fe8..b7d7adf0b58c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -264,7 +264,7 @@ bool hubp3_program_surface_flip_and_addr( void hubp3_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -280,7 +280,7 @@ void hubp3_setup( void hubp3_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp3_dcc_control(struct hubp *hubp, bool enable, @@ -297,6 +297,8 @@ void hubp3_read_state(struct hubp *hubp); void hubp3_init(struct hubp *hubp); +void hubp3_clear_tiling(struct hubp *hubp); + #endif /* __DC_HUBP_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 8394e8c06919..c2900c79a2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -79,6 +79,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, @@ -96,6 +97,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index ca5b4b28a664..edd37898d550 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -181,6 +181,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, @@ -201,7 +202,8 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_update_force_cursor_pstate_disallow = hubp32_update_force_cursor_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp32_update_mall_sel, - .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering + .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp32_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index d1f05b82b3dd..5661d7a80d54 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -172,7 +172,7 @@ void hubp35_program_pixel_format( void hubp35_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -199,6 +199,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, @@ -216,6 +217,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank_value, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp35_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h index 586b43aa5834..d913f80b3130 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h @@ -65,7 +65,7 @@ void hubp35_program_pixel_format( void hubp35_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index b1ebf5053b4f..5ed195377a6c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -40,7 +40,7 @@ #define FN(reg_name, field_name) \ hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name -static void hubp401_program_3dlut_fl_addr(struct hubp *hubp, +void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -49,14 +49,14 @@ static void hubp401_program_3dlut_fl_addr(struct hubp *hubp, REG_WRITE(HUBP_3DLUT_ADDRESS_LOW, address.lut3d.addr.low_part); } -static void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group) +void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, refcyc_per_3dlut_group); } -static void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable) +void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -72,28 +72,28 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp) return ret; } -static void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode) +void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, addr_mode); } -static void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width) +void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width); } -static void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0); } -static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, +void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r) @@ -106,21 +106,21 @@ static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, HUBP_3DLUT_CROSSBAR_SELECT_CR_R, bit_slice_cr_r); } -static void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale) +void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, bias, HUBP0_3DLUT_FL_SCALE, scale); } -static void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode) +void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mode); } -static void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format) +void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -141,34 +141,48 @@ void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor void hubp401_init(struct hubp *hubp) { - //For now nothing to do, HUBPREQ_DEBUG_DB register is removed on DCN4x. + hubp_reset(hubp); } void hubp401_vready_at_or_After_vsync(struct hubp *hubp, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing) { - uint32_t value = 0; + unsigned int vstartup_lines = pipe_global_sync->dcn4x.vstartup_lines; + unsigned int vupdate_offset_pixels = pipe_global_sync->dcn4x.vupdate_offset_pixels; + unsigned int vupdate_width_pixels = pipe_global_sync->dcn4x.vupdate_vupdate_width_pixels; + unsigned int vready_offset_pixels = pipe_global_sync->dcn4x.vready_offset_pixels; + unsigned int htotal = timing->h_total; + unsigned int vblank_start = 0; + unsigned int vblank_end = 0; + unsigned int pixel_width = 0; + uint32_t reg_value = 0; + bool is_vready_at_or_after_vsync = false; struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + /* * if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1 * else * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0 */ - if (pipe_dest->htotal != 0) { - if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width - + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { - value = 1; - } else - value = 0; + if (htotal != 0) { + vblank_start = timing->v_total - timing->v_front_porch; + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; + pixel_width = vready_offset_pixels + vupdate_width_pixels + vupdate_offset_pixels; + + is_vready_at_or_after_vsync = (vstartup_lines - pixel_width / htotal) <= vblank_end; + + if (is_vready_at_or_after_vsync) + reg_value = 1; } - REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); + REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, reg_value); } void hubp401_program_requestor( struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) + struct dml2_display_rq_regs *rq_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -196,8 +210,8 @@ void hubp401_program_requestor( void hubp401_program_deadline( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) + struct dml2_display_dlg_regs *dlg_attr, + struct dml2_display_ttu_regs *ttu_attr) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -294,66 +308,64 @@ void hubp401_program_deadline( void hubp401_setup( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, - struct _vcs_dpi_display_rq_regs_st *rq_regs, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing) { /* otg is locked when this func is called. Register are double buffered. * disable the requestors is not needed */ - hubp401_vready_at_or_After_vsync(hubp, pipe_dest); - hubp401_program_requestor(hubp, rq_regs); - hubp401_program_deadline(hubp, dlg_attr, ttu_attr); + hubp401_vready_at_or_After_vsync(hubp, pipe_global_sync, timing); + hubp401_program_requestor(hubp, &pipe_regs->rq_regs); + hubp401_program_deadline(hubp, &pipe_regs->dlg_regs, &pipe_regs->ttu_regs); } void hubp401_setup_interdependent( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) + struct dml2_dchub_per_pipe_register_set *pipe_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_SET_2(PREFETCH_SETTINGS, 0, - DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, - VRATIO_PREFETCH, dlg_attr->vratio_prefetch); + DST_Y_PREFETCH, pipe_regs->dlg_regs.dst_y_prefetch, + VRATIO_PREFETCH, pipe_regs->dlg_regs.vratio_prefetch); REG_SET(PREFETCH_SETTINGS_C, 0, - VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); + VRATIO_PREFETCH_C, pipe_regs->dlg_regs.vratio_prefetch_c); REG_SET_2(VBLANK_PARAMETERS_0, 0, - DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, - DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); + DST_Y_PER_VM_VBLANK, pipe_regs->dlg_regs.dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, pipe_regs->dlg_regs.dst_y_per_row_vblank); REG_SET_2(FLIP_PARAMETERS_0, 0, - DST_Y_PER_VM_FLIP, dlg_attr->dst_y_per_vm_flip, - DST_Y_PER_ROW_FLIP, dlg_attr->dst_y_per_row_flip); + DST_Y_PER_VM_FLIP, pipe_regs->dlg_regs.dst_y_per_vm_flip, + DST_Y_PER_ROW_FLIP, pipe_regs->dlg_regs.dst_y_per_row_flip); REG_SET(VBLANK_PARAMETERS_3, 0, - REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); + REFCYC_PER_META_CHUNK_VBLANK_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_l); REG_SET(VBLANK_PARAMETERS_4, 0, - REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); + REFCYC_PER_META_CHUNK_VBLANK_C, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_c); REG_SET(FLIP_PARAMETERS_2, 0, - REFCYC_PER_META_CHUNK_FLIP_L, dlg_attr->refcyc_per_meta_chunk_flip_l); + REFCYC_PER_META_CHUNK_FLIP_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_flip_l); REG_SET_2(PER_LINE_DELIVERY_PRE, 0, - REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, - REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); + REFCYC_PER_LINE_DELIVERY_PRE_L, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_c); REG_SET(DCN_SURF0_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_l); + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_l); REG_SET(DCN_SURF1_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_c); + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_c); REG_SET(DCN_CUR0_TTU_CNTL1, 0, - REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); + REFCYC_PER_REQ_DELIVERY_PRE, pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_cur0); REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, - MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, - QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); + MIN_TTU_VBLANK, pipe_regs->ttu_regs.min_ttu_vblank, + QoS_LEVEL_FLIP, pipe_regs->ttu_regs.qos_level_flip); } @@ -508,6 +520,18 @@ bool hubp401_program_surface_flip_and_addr( return true; } +void hubp401_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_EN, 0); +} + void hubp401_dcc_control(struct hubp *hubp, struct dc_plane_dcc_param *dcc) { @@ -520,7 +544,7 @@ void hubp401_dcc_control(struct hubp *hubp, void hubp401_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { /* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x @@ -568,7 +592,7 @@ void hubp401_program_size( void hubp401_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -718,11 +742,13 @@ void hubp401_cursor_set_position( dc_fixpt_from_int(dst_x_offset), param->h_scale_ratio)); - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, - CURSOR_ENABLE, cur_en); + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, x_pos, @@ -969,11 +995,12 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr, .hubp_program_surface_config = hubp401_program_surface_config, .hubp_is_flip_pending = hubp2_is_flip_pending, - .hubp_setup = hubp401_setup, - .hubp_setup_interdependent = hubp401_setup_interdependent, + .hubp_setup2 = hubp401_setup, + .hubp_setup_interdependent2 = hubp401_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp401_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp401_cursor_set_position, @@ -1004,7 +1031,8 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_3dlut_fl_width = hubp401_program_3dlut_fl_width, .hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected, .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, - .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done + .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, + .hubp_clear_tiling = hubp2_clear_tiling, }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index e52fdb5b0cd0..6e1d4c90ddd4 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -256,29 +256,15 @@ void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); -void hubp401_vready_at_or_After_vsync(struct hubp *hubp, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); - -void hubp401_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs); - -void hubp401_program_deadline( - struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); - void hubp401_setup( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, - struct _vcs_dpi_display_rq_regs_st *rq_regs, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); void hubp401_setup_interdependent( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); + struct dml2_dchub_per_pipe_register_set *pipe_regs); bool hubp401_program_surface_flip_and_addr( struct hubp *hubp, @@ -290,7 +276,7 @@ void hubp401_dcc_control(struct hubp *hubp, void hubp401_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp401_program_size( @@ -302,7 +288,7 @@ void hubp401_program_size( void hubp401_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -340,4 +326,42 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp); void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable); +void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale); + +void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); + +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled); + +void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width); + +void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode); + +void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable); + +void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group); + +void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address); + +void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format); + +void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); + +void hubp401_clear_tiling(struct hubp *hubp); + +void hubp401_vready_at_or_After_vsync(struct hubp *hubp, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); + +void hubp401_program_requestor( + struct hubp *hubp, + struct dml2_display_rq_regs *rq_regs); + +void hubp401_program_deadline( + struct hubp *hubp, + struct dml2_display_dlg_regs *dlg_attr, + struct dml2_display_ttu_regs *ttu_attr); + #endif /* __DC_HUBP_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 681bb92c6069..44e405e9bc97 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1286,6 +1286,7 @@ void dcn10_plane_atomic_power_down(struct dc *dc, if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, hubp->inst, false); + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, @@ -1447,6 +1448,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index b029ec1b26d3..a5e18ab72394 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1288,7 +1288,7 @@ static void dcn20_power_on_plane_resources( } } -static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, +void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) { //if (dc->debug.sanity_checks) { @@ -1467,7 +1467,7 @@ void dcn20_pipe_control_lock( } } -static void dcn20_detect_pipe_changes(struct dc_state *old_state, +void dcn20_detect_pipe_changes(struct dc_state *old_state, struct dc_state *new_state, struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) @@ -1655,7 +1655,7 @@ static void dcn20_detect_pipe_changes(struct dc_state *old_state, } } -static void dcn20_update_dchubp_dpp( +void dcn20_update_dchubp_dpp( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) @@ -1678,25 +1678,41 @@ static void dcn20_update_dchubp_dpp( * VTG is within DCHUBBUB which is commond block share by each pipe HUBP. * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG */ + if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst); - hubp->funcs->hubp_setup( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs, - &pipe_ctx->rq_regs, - &pipe_ctx->pipe_dlg_param); + if (hubp->funcs->hubp_setup2) { + hubp->funcs->hubp_setup2( + hubp, + &pipe_ctx->hubp_regs, + &pipe_ctx->global_sync, + &pipe_ctx->stream->timing); + } else { + hubp->funcs->hubp_setup( + hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs, + &pipe_ctx->rq_regs, + &pipe_ctx->pipe_dlg_param); + } } if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req); - if (pipe_ctx->update_flags.bits.hubp_interdependent) - hubp->funcs->hubp_setup_interdependent( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs); + if (pipe_ctx->update_flags.bits.hubp_interdependent) { + if (hubp->funcs->hubp_setup_interdependent2) { + hubp->funcs->hubp_setup_interdependent2( + hubp, + &pipe_ctx->hubp_regs); + } else { + hubp->funcs->hubp_setup_interdependent( + hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs); + } + } if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || @@ -1756,10 +1772,9 @@ static void dcn20_update_dchubp_dpp( &pipe_ctx->plane_res.scl_data.viewport_c); viewport_changed = true; } - if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) - hubp->funcs->hubp_program_mcache_id_and_split_coordinate( - hubp, - &pipe_ctx->mcache_regs); + + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) + hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, &pipe_ctx->mcache_regs); /* Any updates are handled in dc interface, just need to apply existing for plane enable */ if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || @@ -1838,7 +1853,7 @@ static void dcn20_update_dchubp_dpp( hubp->funcs->phantom_hubp_post_enable(hubp); } -static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +static int dcn20_calculate_vready_offset_for_group(struct pipe_ctx *pipe) { struct pipe_ctx *other_pipe; int vready_offset = pipe->pipe_dlg_param.vready_offset; @@ -1864,6 +1879,30 @@ static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) return vready_offset; } +static void dcn20_program_tg( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dce_hwseq *hws) +{ + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn20_calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); +} + static void dcn20_program_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -1874,33 +1913,17 @@ static void dcn20_program_pipe( /* Only need to unblank on top pipe */ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.odm || - pipe_ctx->stream->update_flags.bits.abm_level) + pipe_ctx->update_flags.bits.odm || + pipe_ctx->stream->update_flags.bits.abm_level) hws->funcs.blank_pixel_data(dc, pipe_ctx, - !pipe_ctx->plane_state || - !pipe_ctx->plane_state->visible); + !pipe_ctx->plane_state || + !pipe_ctx->plane_state->visible); } /* Only update TG on top pipe */ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe - && !pipe_ctx->prev_odm_pipe) { - pipe_ctx->stream_res.tg->funcs->program_global_sync( - pipe_ctx->stream_res.tg, - calculate_vready_offset_for_group(pipe_ctx), - pipe_ctx->pipe_dlg_param.vstartup_start, - pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width, - pipe_ctx->pipe_dlg_param.pstate_keepout); - - if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); - - pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); - - if (hws->funcs.setup_vupdate_interrupt) - hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); - } + && !pipe_ctx->prev_odm_pipe) + dcn20_program_tg(dc, pipe_ctx, context, hws); if (pipe_ctx->update_flags.bits.odm) hws->funcs.update_odm(dc, context, pipe_ctx); @@ -1931,22 +1954,22 @@ static void dcn20_program_pipe( dcn20_update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a); pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } if (pipe_ctx->plane_state && - (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d || - pipe_ctx->update_flags.bits.enable)) + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1954,10 +1977,10 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1966,7 +1989,7 @@ static void dcn20_program_pipe( * causes a different pipe to be chosen to odm combine with. */ if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->update_flags.bits.opp_changed) { + || pipe_ctx->update_flags.bits.opp_changed) { pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( pipe_ctx->stream_res.opp, @@ -1996,14 +2019,14 @@ static void dcn20_program_pipe( memset(¶ms, 0, sizeof(params)); odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, ¶ms); dc->hwss.set_disp_pattern_generator(dc, - pipe_ctx, - pipe_ctx->stream_res.test_pattern_params.test_pattern, - pipe_ctx->stream_res.test_pattern_params.color_space, - pipe_ctx->stream_res.test_pattern_params.color_depth, - NULL, - pipe_ctx->stream_res.test_pattern_params.width, - pipe_ctx->stream_res.test_pattern_params.height, - pipe_ctx->stream_res.test_pattern_params.offset); + pipe_ctx, + pipe_ctx->stream_res.test_pattern_params.test_pattern, + pipe_ctx->stream_res.test_pattern_params.color_space, + pipe_ctx->stream_res.test_pattern_params.color_depth, + NULL, + pipe_ctx->stream_res.test_pattern_params.width, + pipe_ctx->stream_res.test_pattern_params.height, + pipe_ctx->stream_res.test_pattern_params.offset); } } @@ -2012,11 +2035,12 @@ void dcn20_program_front_end_for_ctx( struct dc_state *context) { int i; - struct dce_hwseq *hws = dc->hwseq; - DC_LOGGER_INIT(dc->ctx->logger); unsigned int prev_hubp_count = 0; unsigned int hubp_count = 0; - struct pipe_ctx *pipe; + struct dce_hwseq *hws = dc->hwseq; + struct pipe_ctx *pipe = NULL; + + DC_LOGGER_INIT(dc->ctx->logger); if (resource_is_pipe_topology_changed(dc->current_state, context)) resource_log_pipe_topology_update(dc, context); @@ -2029,7 +2053,7 @@ void dcn20_program_front_end_for_ctx( ASSERT(!pipe->plane_state->triplebuffer_flips); /*turn off triple buffer for full update*/ dc->hwss.program_triplebuffer( - dc, pipe, pipe->plane_state->triplebuffer_flips); + dc, pipe, pipe->plane_state->triplebuffer_flips); } } } @@ -2044,30 +2068,31 @@ void dcn20_program_front_end_for_ctx( if (prev_hubp_count == 0 && hubp_count > 0) { if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, true, false); + dc->res_pool->hubbub, true, false); udelay(500); } /* Set pipe update flags and lock pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], - &context->res_ctx.pipe_ctx[i]); + &context->res_ctx.pipe_ctx[i]); /* When disabling phantom pipes, turn on phantom OTG first (so we can get double * buffer updates properly) */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && - dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; if (tg->funcs->enable_crtc) { - if (dc->hwseq->funcs.blank_pixel_data) { + if (dc->hwseq->funcs.blank_pixel_data) dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); - } + tg->funcs->enable_crtc(tg); } } @@ -2075,15 +2100,15 @@ void dcn20_program_front_end_for_ctx( /* OTG blank before disabling all front ends */ for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - && !context->res_ctx.pipe_ctx[i].top_pipe - && !context->res_ctx.pipe_ctx[i].prev_odm_pipe - && context->res_ctx.pipe_ctx[i].stream) + && !context->res_ctx.pipe_ctx[i].top_pipe + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe + && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { struct hubbub *hubbub = dc->res_pool->hubbub; /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom @@ -2093,13 +2118,18 @@ void dcn20_program_front_end_for_ctx( * DET allocation. */ if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || - (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) { + (context->res_ctx.pipe_ctx[i].plane_state && + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) + == SUBVP_PHANTOM))) { if (hubbub->funcs->program_det_size) - hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + hubbub->funcs->program_det_size(hubbub, + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); if (dc->res_pool->hubbub->funcs->program_det_segments) - dc->res_pool->hubbub->funcs->program_det_segments(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + dc->res_pool->hubbub->funcs->program_det_segments( + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); } - hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, + &dc->current_state->res_ctx.pipe_ctx[i]); DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); } @@ -2107,9 +2137,9 @@ void dcn20_program_front_end_for_ctx( for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; if (resource_is_pipe_type(pipe, OTG_MASTER) && - !resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->update_flags.bits.odm && - hws->funcs.update_odm) + !resource_is_pipe_type(pipe, DPP_PIPE) && + pipe->update_flags.bits.odm && + hws->funcs.update_odm) hws->funcs.update_odm(dc, context, pipe); } @@ -2127,25 +2157,28 @@ void dcn20_program_front_end_for_ctx( else { /* Don't program phantom pipes in the regular front end programming sequence. * There is an MPO transition case where a pipe being used by a video plane is - * transitioned directly to be a phantom pipe when closing the MPO video. However - * the phantom pipe will program a new HUBP_VTG_SEL (update takes place right away), - * but the MPO still exists until the double buffered update of the main pipe so we - * will get a frame of underflow if the phantom pipe is programmed here. + * transitioned directly to be a phantom pipe when closing the MPO video. + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place + * right away) but the MPO still exists until the double buffered update of the + * main pipe so we will get a frame of underflow if the phantom pipe is + * programmed here. */ - if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) + if (pipe->stream && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) dcn20_program_pipe(dc, pipe, context); } pipe = pipe->bottom_pipe; } } + /* Program secondary blending tree and writeback pipes */ pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->top_pipe && !pipe->prev_odm_pipe - && pipe->stream && pipe->stream->num_wb_info > 0 - && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) - || pipe->stream->update_flags.raw) - && hws->funcs.program_all_writeback_pipes_in_tree) + && pipe->stream && pipe->stream->num_wb_info > 0 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) + || pipe->stream->update_flags.raw) + && hws->funcs.program_all_writeback_pipes_in_tree) hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); /* Avoid underflow by check of pipe line read when adding 2nd plane. */ @@ -2164,7 +2197,7 @@ void dcn20_program_front_end_for_ctx( * buffered pending status clear and reset opp head pipe's none double buffered * registers to their initial state. */ -static void post_unlock_reset_opp(struct dc *dc, +void dcn20_post_unlock_reset_opp(struct dc *dc, struct pipe_ctx *opp_head) { struct display_stream_compressor *dsc = opp_head->stream_res.dsc; @@ -2201,16 +2234,17 @@ void dcn20_post_unlock_program_front_end( struct dc *dc, struct dc_state *context) { - int i; - const unsigned int TIMEOUT_FOR_PIPE_ENABLE_US = 100000; + // Timeout for pipe enable + unsigned int timeout_us = 100000; unsigned int polling_interval_us = 1; struct dce_hwseq *hwseq = dc->hwseq; + int i; for (i = 0; i < dc->res_pool->pipe_count; i++) if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && - !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) - post_unlock_reset_opp(dc, - &dc->current_state->res_ctx.pipe_ctx[i]); + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) + dcn20_post_unlock_reset_opp(dc, + &dc->current_state->res_ctx.pipe_ctx[i]); for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) @@ -2226,11 +2260,12 @@ void dcn20_post_unlock_program_front_end( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us - && hubp->funcs->hubp_is_flip_pending(hubp); j++) + + for (j = 0; j < timeout_us / polling_interval_us + && hubp->funcs->hubp_is_flip_pending(hubp); j++) udelay(polling_interval_us); } } @@ -2244,15 +2279,14 @@ void dcn20_post_unlock_program_front_end( * before we've transitioned to 2:1 or 4:1 */ if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && - resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { int j = 0; struct timing_generator *tg = pipe->stream_res.tg; - if (tg->funcs->get_optc_double_buffer_pending) { - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us - && tg->funcs->get_optc_double_buffer_pending(tg); j++) + for (j = 0; j < timeout_us / polling_interval_us + && tg->funcs->get_optc_double_buffer_pending(tg); j++) udelay(polling_interval_us); } } @@ -2260,7 +2294,7 @@ void dcn20_post_unlock_program_front_end( if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, false, false); + dc->res_pool->hubbub, false, false); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -2291,11 +2325,11 @@ void dcn20_post_unlock_program_front_end( return; /* P-State support transitions: - * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe - * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) - * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe - * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe - * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); @@ -2310,12 +2344,11 @@ void dcn20_post_unlock_program_front_end( if (hwseq->wa.DEGVIDCN21) dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); - /* WA for stutter underflow during MPO transitions when adding 2nd plane */ if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { if (dc->current_state->stream_status[0].plane_count == 1 && - context->stream_status[0].plane_count > 1) { + context->stream_status[0].plane_count > 1) { struct timing_generator *tg = dc->res_pool->timing_generators[0]; @@ -2463,7 +2496,7 @@ bool dcn20_update_bandwidth( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - calculate_vready_offset_for_group(pipe_ctx), + dcn20_calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index 5c874f7b0683..9d1ad3b29ca5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -154,6 +154,21 @@ void dcn20_setup_gsl_group_as_lock( const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enable); - +void dcn20_detect_pipe_changes( + struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); +void dcn20_enable_plane( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn20_update_dchubp_dpp( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn20_post_unlock_reset_opp( + struct dc *dc, + struct pipe_ctx *opp_head); #endif /* __DC_HWSS_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 0e8d32e3dbae..c32764aef884 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 780ce4c064aa..dcb27cdbce73 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 5f8f45b48720..fb2ffb637931 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -89,7 +89,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -98,7 +97,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn31_set_backlight_level, + .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 9b88eb72086d..be26c925fdfa 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -162,6 +162,8 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx int opp_inst[MAX_PIPES] = {0}; int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false); int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true); + struct mpc *mpc = dc->res_pool->mpc; + int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -174,6 +176,16 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + if (mpc->funcs->set_out_rate_control) { + for (i = 0; i < opp_cnt; ++i) { + mpc->funcs->set_out_rate_control( + mpc, opp_inst[i], + false, + 0, + NULL); + } + } + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 6bdfbf22ce87..21ef03a76229 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -100,7 +99,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn31_set_backlight_level, + .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index fa11f075d1f9..ee4de9ddfef4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -985,6 +985,7 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support; dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support; /* for DCN401 testing only */ dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; @@ -1398,12 +1399,12 @@ void dcn32_disable_link_output(struct dc_link *link, link_hwss->disable_link_output(link, link_res, signal); link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; - - if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_power_control && - !link->skip_implict_edp_power_control) - link->dc->hwss.edp_power_control(link, false); - else if (dmcu != NULL && dmcu->funcs->unlock_phy) + /* + * Add the logic to extract BOTH power up and power down sequences + * from enable/disable link output and only call edp panel control + * in enable_link_dp and disable_link_dp once. + */ + if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5ecee7e320da..e4d149eff10f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -87,7 +87,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e599cdc465bf..623cde76debf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -426,6 +426,8 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * int opp_inst[MAX_PIPES] = {0}; int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false); int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true); + struct mpc *mpc = dc->res_pool->mpc; + int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -438,6 +440,16 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + if (mpc->funcs->set_out_rate_control) { + for (i = 0; i < opp_cnt; ++i) { + mpc->funcs->set_out_rate_control( + mpc, opp_inst[i], + false, + 0, + NULL); + } + } + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, @@ -788,6 +800,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -944,6 +957,7 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) /*to do, need to support both case*/ hubp->power_gated = true; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream = NULL; @@ -1020,8 +1034,13 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false; - if (pipe_ctx->stream_res.dsc) + if (pipe_ctx->stream_res.dsc) { update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false; + if (dc->caps.sequential_ono) { + update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; + update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + } + } if (pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; @@ -1579,3 +1598,37 @@ bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) return false; } + +/* + * Set powerup to true for every pipe to match pre-OS configuration. + */ +static void dcn35_calc_blocks_to_ungate_for_hw_release(struct dc *dc, struct pg_block_update *update_state) +{ + int i = 0, j = 0; + + memset(update_state, 0, sizeof(struct pg_block_update)); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++) + update_state->pg_pipe_res_update[j][i] = true; + + update_state->pg_res_update[PG_HPO] = true; + update_state->pg_res_update[PG_DWB] = true; +} + +/* + * The purpose is to power up all gatings to restore optimization to pre-OS env. + * Re-use hwss func and existing PG&RCG flags to decide powerup sequence. + */ +void dcn35_hardware_release(struct dc *dc) +{ + struct pg_block_update pg_update_state; + + dcn35_calc_blocks_to_ungate_for_hw_release(dc, &pg_update_state); + + if (dc->hwss.root_clock_control) + dc->hwss.root_clock_control(dc, &pg_update_state, true); + /*power up required HW block*/ + if (dc->hwss.hw_block_power_up) + dc->hwss.hw_block_power_up(dc, &pg_update_state); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index e27b3609020f..0b1d6f608edd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -99,4 +99,6 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); +void dcn35_hardware_release(struct dc *dc); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index fd67779c27a9..c7acaf97974c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -92,7 +92,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -123,6 +122,11 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .hardware_release = dcn35_hardware_release, + .detect_pipe_changes = dcn20_detect_pipe_changes, + .enable_plane = dcn20_enable_plane, + .update_dchubp_dpp = dcn20_update_dchubp_dpp, + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, }; static const struct hwseq_private_funcs dcn35_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 3c275a1eff58..4f73e7f551ac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 307782592789..555a9f590cd7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dm_services.h" +#include "basics/dc_common.h" #include "dm_helpers.h" #include "core_types.h" #include "resource.h" @@ -126,91 +127,6 @@ void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx) mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); } -struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region) -{ - struct dce_hwseq *hws = dc->hwseq; - struct ips_ono_region_state state = {0, 0}; - - switch (region) { - case 0: - /* dccg, dio, dcio */ - REG_GET_2(DOMAIN22_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 1: - /* dchubbub, dchvm, dchubbubmem */ - REG_GET_2(DOMAIN23_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 2: - /* mpc, opp, optc, dwb */ - REG_GET_2(DOMAIN24_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 3: - /* hpo */ - REG_GET_2(DOMAIN25_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 4: - /* dchubp0, dpp0 */ - REG_GET_2(DOMAIN0_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 5: - /* dsc0 */ - REG_GET_2(DOMAIN16_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 6: - /* dchubp1, dpp1 */ - REG_GET_2(DOMAIN1_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 7: - /* dsc1 */ - REG_GET_2(DOMAIN17_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 8: - /* dchubp2, dpp2 */ - REG_GET_2(DOMAIN2_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 9: - /* dsc2 */ - REG_GET_2(DOMAIN18_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 10: - /* dchubp3, dpp3 */ - REG_GET_2(DOMAIN3_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 11: - /* dsc3 */ - REG_GET_2(DOMAIN19_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - default: - break; - } - - return state; -} - void dcn401_init_hw(struct dc *dc) { struct abm **abms = dc->res_pool->multiple_abms; @@ -435,7 +351,8 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; + dc->debug.fams2_config.bits.enable &= + dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ @@ -821,7 +738,7 @@ enum dc_status dcn401_enable_stream_timing( int opp_inst[MAX_PIPES] = {0}; struct pipe_ctx *opp_heads[MAX_PIPES] = {0}; struct dc_crtc_timing patched_crtc_timing = stream->timing; - bool manual_mode; + bool manual_mode = false; unsigned int tmds_div = PIXEL_RATE_DIV_NA; unsigned int unused_div = PIXEL_RATE_DIV_NA; int odm_slice_width; @@ -880,15 +797,15 @@ enum dc_status dcn401_enable_stream_timing( patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; pipe_ctx->stream_res.tg->funcs->program_timing( - pipe_ctx->stream_res.tg, - &patched_crtc_timing, - pipe_ctx->pipe_dlg_param.vready_offset, - pipe_ctx->pipe_dlg_param.vstartup_start, - pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width, - pipe_ctx->pipe_dlg_param.pstate_keepout, - pipe_ctx->stream->signal, - true); + pipe_ctx->stream_res.tg, + &patched_crtc_timing, + (unsigned int)pipe_ctx->global_sync.dcn4x.vready_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines, + pipe_ctx->stream->signal, + true); for (i = 0; i < opp_cnt; i++) { opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control( @@ -2012,3 +1929,730 @@ void dcn401_reset_hw_ctx_wrap( } } } + +static unsigned int dcn401_calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + unsigned int vready_offset = pipe->global_sync.dcn4x.vready_offset_pixels; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + + return vready_offset; +} + +static void dcn401_program_tg( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dce_hwseq *hws) +{ + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn401_calculate_vready_offset_for_group(pipe_ctx), + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); +} + +static void dcn401_program_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + struct dce_hwseq *hws = dc->hwseq; + + /* Only need to unblank on top pipe */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.odm || + pipe_ctx->stream->update_flags.bits.abm_level) + hws->funcs.blank_pixel_data(dc, pipe_ctx, + !pipe_ctx->plane_state || + !pipe_ctx->plane_state->visible); + } + + /* Only update TG on top pipe */ + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe + && !pipe_ctx->prev_odm_pipe) + dcn401_program_tg(dc, pipe_ctx, context, hws); + + if (pipe_ctx->update_flags.bits.odm) + hws->funcs.update_odm(dc, context, pipe_ctx); + + if (pipe_ctx->update_flags.bits.enable) { + if (hws->funcs.enable_plane) + hws->funcs.enable_plane(dc, pipe_ctx, context); + else + dc->hwss.enable_plane(dc, pipe_ctx, context); + + if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes) + dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub); + } + + if (pipe_ctx->update_flags.bits.det_size) { + if (dc->res_pool->hubbub->funcs->program_det_size) + dc->res_pool->hubbub->funcs->program_det_size( + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb); + if (dc->res_pool->hubbub->funcs->program_det_segments) + dc->res_pool->hubbub->funcs->program_det_segments( + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); + } + + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) + dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); + + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + hws->funcs.set_hdr_multiplier(pipe_ctx); + + if (hws->funcs.populate_mcm_luts) { + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } + } + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) + hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); + + /* dcn10_translate_regamma_to_hw_format takes 750us to finish + * only do gamma programming for powering on, internal memcmp to avoid + * updating on slave planes + */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); + + /* If the pipe has been enabled or has a different opp, we + * should reprogram the fmt. This deals with cases where + * interation between mpc and odm combine on different streams + * causes a different pipe to be chosen to odm combine with. + */ + if (pipe_ctx->update_flags.bits.enable + || pipe_ctx->update_flags.bits.opp_changed) { + + pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( + pipe_ctx->stream_res.opp, + COLOR_SPACE_YCBCR601, + pipe_ctx->stream->timing.display_color_depth, + pipe_ctx->stream->signal); + + pipe_ctx->stream_res.opp->funcs->opp_program_fmt( + pipe_ctx->stream_res.opp, + &pipe_ctx->stream->bit_depth_params, + &pipe_ctx->stream->clamping); + } + + /* Set ABM pipe after other pipe configurations done */ + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { + if (pipe_ctx->stream_res.abm) { + dc->hwss.set_pipe(pipe_ctx); + pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, + pipe_ctx->stream->abm_level); + } + } + + if (pipe_ctx->update_flags.bits.test_pattern_changed) { + struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp; + struct bit_depth_reduction_params params; + + memset(¶ms, 0, sizeof(params)); + odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, ¶ms); + dc->hwss.set_disp_pattern_generator(dc, + pipe_ctx, + pipe_ctx->stream_res.test_pattern_params.test_pattern, + pipe_ctx->stream_res.test_pattern_params.color_space, + pipe_ctx->stream_res.test_pattern_params.color_depth, + NULL, + pipe_ctx->stream_res.test_pattern_params.width, + pipe_ctx->stream_res.test_pattern_params.height, + pipe_ctx->stream_res.test_pattern_params.offset); + } +} + +void dcn401_program_front_end_for_ctx( + struct dc *dc, + struct dc_state *context) +{ + int i; + unsigned int prev_hubp_count = 0; + unsigned int hubp_count = 0; + struct dce_hwseq *hws = dc->hwseq; + struct pipe_ctx *pipe = NULL; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (resource_is_pipe_topology_changed(dc->current_state, context)) + resource_log_pipe_topology_update(dc, context); + + if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) { + if (pipe->plane_state->triplebuffer_flips) + BREAK_TO_DEBUGGER(); + + /*turn off triple buffer for full update*/ + dc->hwss.program_triplebuffer( + dc, pipe, pipe->plane_state->triplebuffer_flips); + } + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (dc->current_state->res_ctx.pipe_ctx[i].plane_state) + prev_hubp_count++; + if (context->res_ctx.pipe_ctx[i].plane_state) + hubp_count++; + } + + if (prev_hubp_count == 0 && hubp_count > 0) { + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, false); + udelay(500); + } + + /* Set pipe update flags and lock pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + dc->hwss.detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], + &context->res_ctx.pipe_ctx[i]); + + /* When disabling phantom pipes, turn on phantom OTG first (so we can get double + * buffer updates properly) + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; + + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { + struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; + + if (tg->funcs->enable_crtc) { + if (dc->hwseq->funcs.blank_pixel_data) + dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); + + tg->funcs->enable_crtc(tg); + } + } + } + /* OTG blank before disabling all front ends */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable + && !context->res_ctx.pipe_ctx[i].top_pipe + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe + && context->res_ctx.pipe_ctx[i].stream) + hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); + + + /* Disconnect mpcc */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom + * then we want to do the programming here (effectively it's being disabled). If we do + * the programming later the DET won't be updated until the OTG for the phantom pipe is + * turned on (i.e. in an MCLK switch) which can come in too late and cause issues with + * DET allocation. + */ + if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || + (context->res_ctx.pipe_ctx[i].plane_state && + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == + SUBVP_PHANTOM))) { + if (hubbub->funcs->program_det_size) + hubbub->funcs->program_det_size(hubbub, + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + if (dc->res_pool->hubbub->funcs->program_det_segments) + dc->res_pool->hubbub->funcs->program_det_segments( + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + } + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, + &dc->current_state->res_ctx.pipe_ctx[i]); + DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); + } + + /* update ODM for blanked OTG master pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (resource_is_pipe_type(pipe, OTG_MASTER) && + !resource_is_pipe_type(pipe, DPP_PIPE) && + pipe->update_flags.bits.odm && + hws->funcs.update_odm) + hws->funcs.update_odm(dc, context, pipe); + } + + /* + * Program all updated pipes, order matters for mpcc setup. Start with + * top pipe and program all pipes that follow in order + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && !pipe->top_pipe) { + while (pipe) { + if (hws->funcs.program_pipe) + hws->funcs.program_pipe(dc, pipe, context); + else { + /* Don't program phantom pipes in the regular front end programming sequence. + * There is an MPO transition case where a pipe being used by a video plane is + * transitioned directly to be a phantom pipe when closing the MPO video. + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place + * right away) but the MPO still exists until the double buffered update of the + * main pipe so we will get a frame of underflow if the phantom pipe is + * programmed here. + */ + if (pipe->stream && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) + dcn401_program_pipe(dc, pipe, context); + } + + pipe = pipe->bottom_pipe; + } + } + + /* Program secondary blending tree and writeback pipes */ + pipe = &context->res_ctx.pipe_ctx[i]; + if (!pipe->top_pipe && !pipe->prev_odm_pipe + && pipe->stream && pipe->stream->num_wb_info > 0 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) + || pipe->stream->update_flags.raw) + && hws->funcs.program_all_writeback_pipes_in_tree) + hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); + + /* Avoid underflow by check of pipe line read when adding 2nd plane. */ + if (hws->wa.wait_hubpret_read_start_during_mpo_transition && + !pipe->top_pipe && + pipe->stream && + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start && + dc->current_state->stream_status[0].plane_count == 1 && + context->stream_status[0].plane_count > 1) { + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp); + } + } +} + +void dcn401_post_unlock_program_front_end( + struct dc *dc, + struct dc_state *context) +{ + // Timeout for pipe enable + unsigned int timeout_us = 100000; + unsigned int polling_interval_us = 1; + struct dce_hwseq *hwseq = dc->hwseq; + int i; + + DC_LOGGER_INIT(dc->ctx->logger); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) + dc->hwss.post_unlock_reset_opp(dc, + &dc->current_state->res_ctx.pipe_ctx[i]); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); + + /* + * If we are enabling a pipe, we need to wait for pending clear as this is a critical + * part of the enable operation otherwise, DM may request an immediate flip which + * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which + * is unsupported on DCN. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + // Don't check flip pending on phantom pipes + if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + struct hubp *hubp = pipe->plane_res.hubp; + int j = 0; + + for (j = 0; j < timeout_us / polling_interval_us + && hubp->funcs->hubp_is_flip_pending(hubp); j++) + udelay(polling_interval_us); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + /* When going from a smaller ODM slice count to larger, we must ensure double + * buffer update completes before we return to ensure we don't reduce DISPCLK + * before we've transitioned to 2:1 or 4:1 + */ + if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + int j = 0; + struct timing_generator *tg = pipe->stream_res.tg; + + if (tg->funcs->get_optc_double_buffer_pending) { + for (j = 0; j < timeout_us / polling_interval_us + && tg->funcs->get_optc_double_buffer_pending(tg); j++) + udelay(polling_interval_us); + } + } + } + + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, false, false); + + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && !pipe->top_pipe) { + /* Program phantom pipe here to prevent a frame of underflow in the MPO transition + * case (if a pipe being used for a video plane transitions to a phantom pipe, it + * can underflow due to HUBP_VTG_SEL programming if done in the regular front end + * programming sequence). + */ + while (pipe) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + /* When turning on the phantom pipe we want to run through the + * entire enable sequence, so apply all the "enable" flags. + */ + if (dc->hwss.apply_update_flags_for_phantom) + dc->hwss.apply_update_flags_for_phantom(pipe); + if (dc->hwss.update_phantom_vp_position) + dc->hwss.update_phantom_vp_position(dc, context, pipe); + dcn401_program_pipe(dc, pipe, context); + } + pipe = pipe->bottom_pipe; + } + } + } + + if (!hwseq) + return; + + /* P-State support transitions: + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes + */ + if (hwseq->funcs.update_force_pstate) + dc->hwseq->funcs.update_force_pstate(dc, context); + + /* Only program the MALL registers after all the main and phantom pipes + * are done programming. + */ + if (hwseq->funcs.program_mall_pipe_config) + hwseq->funcs.program_mall_pipe_config(dc, context); + + /* WA to apply WM setting*/ + if (hwseq->wa.DEGVIDCN21) + dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); + + + /* WA for stutter underflow during MPO transitions when adding 2nd plane */ + if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { + + if (dc->current_state->stream_status[0].plane_count == 1 && + context->stream_status[0].plane_count > 1) { + + struct timing_generator *tg = dc->res_pool->timing_generators[0]; + + dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false); + + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true; + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame = + tg->funcs->get_frame_count(tg); + } + } +} + +bool dcn401_update_bandwidth( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct dce_hwseq *hws = dc->hwseq; + + /* recalculate DML parameters */ + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) + return false; + + /* apply updated bandwidth parameters */ + dc->hwss.prepare_bandwidth(dc, context); + + /* update hubp configs for all pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state == NULL) + continue; + + if (pipe_ctx->top_pipe == NULL) { + bool blank = !is_pipe_tree_visible(pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn401_calculate_vready_offset_for_group(pipe_ctx), + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); + + if (pipe_ctx->prev_odm_pipe == NULL) + hws->funcs.blank_pixel_data(dc, pipe_ctx, blank); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); + } + + if (pipe_ctx->plane_res.hubp->funcs->hubp_setup2) + pipe_ctx->plane_res.hubp->funcs->hubp_setup2( + pipe_ctx->plane_res.hubp, + &pipe_ctx->hubp_regs, + &pipe_ctx->global_sync, + &pipe_ctx->stream->timing); + } + + return true; +} + +void dcn401_detect_pipe_changes(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe) +{ + bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM; + bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM; + + unsigned int old_pipe_vready_offset_pixels = old_pipe->global_sync.dcn4x.vready_offset_pixels; + unsigned int new_pipe_vready_offset_pixels = new_pipe->global_sync.dcn4x.vready_offset_pixels; + unsigned int old_pipe_vstartup_lines = old_pipe->global_sync.dcn4x.vstartup_lines; + unsigned int new_pipe_vstartup_lines = new_pipe->global_sync.dcn4x.vstartup_lines; + unsigned int old_pipe_vupdate_offset_pixels = old_pipe->global_sync.dcn4x.vupdate_offset_pixels; + unsigned int new_pipe_vupdate_offset_pixels = new_pipe->global_sync.dcn4x.vupdate_offset_pixels; + unsigned int old_pipe_vupdate_width_pixels = old_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; + unsigned int new_pipe_vupdate_width_pixels = new_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; + + new_pipe->update_flags.raw = 0; + + /* If non-phantom pipe is being transitioned to a phantom pipe, + * set disable and return immediately. This is because the pipe + * that was previously in use must be fully disabled before we + * can "enable" it as a phantom pipe (since the OTG will certainly + * be different). The post_unlock sequence will set the correct + * update flags to enable the phantom pipe. + */ + if (old_pipe->plane_state && !old_is_phantom && + new_pipe->plane_state && new_is_phantom) { + new_pipe->update_flags.bits.disable = 1; + return; + } + + if (resource_is_pipe_type(new_pipe, OTG_MASTER) && + resource_is_odm_topology_changed(new_pipe, old_pipe)) + /* Detect odm changes */ + new_pipe->update_flags.bits.odm = 1; + + /* Exit on unchanged, unused pipe */ + if (!old_pipe->plane_state && !new_pipe->plane_state) + return; + /* Detect pipe enable/disable */ + if (!old_pipe->plane_state && new_pipe->plane_state) { + new_pipe->update_flags.bits.enable = 1; + new_pipe->update_flags.bits.mpcc = 1; + new_pipe->update_flags.bits.dppclk = 1; + new_pipe->update_flags.bits.hubp_interdependent = 1; + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + new_pipe->update_flags.bits.unbounded_req = 1; + new_pipe->update_flags.bits.gamut_remap = 1; + new_pipe->update_flags.bits.scaler = 1; + new_pipe->update_flags.bits.viewport = 1; + new_pipe->update_flags.bits.det_size = 1; + if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE && + new_pipe->stream_res.test_pattern_params.width != 0 && + new_pipe->stream_res.test_pattern_params.height != 0) + new_pipe->update_flags.bits.test_pattern_changed = 1; + if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { + new_pipe->update_flags.bits.odm = 1; + new_pipe->update_flags.bits.global_sync = 1; + } + return; + } + + /* For SubVP we need to unconditionally enable because any phantom pipes are + * always removed then newly added for every full updates whenever SubVP is in use. + * The remove-add sequence of the phantom pipe always results in the pipe + * being blanked in enable_stream_timing (DPG). + */ + if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM) + new_pipe->update_flags.bits.enable = 1; + + /* Phantom pipes are effectively disabled, if the pipe was previously phantom + * we have to enable + */ + if (old_pipe->plane_state && old_is_phantom && + new_pipe->plane_state && !new_is_phantom) + new_pipe->update_flags.bits.enable = 1; + + if (old_pipe->plane_state && !new_pipe->plane_state) { + new_pipe->update_flags.bits.disable = 1; + return; + } + + /* Detect plane change */ + if (old_pipe->plane_state != new_pipe->plane_state) + new_pipe->update_flags.bits.plane_changed = true; + + /* Detect top pipe only changes */ + if (resource_is_pipe_type(new_pipe, OTG_MASTER)) { + /* Detect global sync changes */ + if ((old_pipe_vready_offset_pixels != new_pipe_vready_offset_pixels) + || (old_pipe_vstartup_lines != new_pipe_vstartup_lines) + || (old_pipe_vupdate_offset_pixels != new_pipe_vupdate_offset_pixels) + || (old_pipe_vupdate_width_pixels != new_pipe_vupdate_width_pixels)) + new_pipe->update_flags.bits.global_sync = 1; + } + + if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb) + new_pipe->update_flags.bits.det_size = 1; + + /* + * Detect opp / tg change, only set on change, not on enable + * Assume mpcc inst = pipe index, if not this code needs to be updated + * since mpcc is what is affected by these. In fact all of our sequence + * makes this assumption at the moment with how hubp reset is matched to + * same index mpcc reset. + */ + if (old_pipe->stream_res.opp != new_pipe->stream_res.opp) + new_pipe->update_flags.bits.opp_changed = 1; + if (old_pipe->stream_res.tg != new_pipe->stream_res.tg) + new_pipe->update_flags.bits.tg_changed = 1; + + /* + * Detect mpcc blending changes, only dpp inst and opp matter here, + * mpccs getting removed/inserted update connected ones during their own + * programming + */ + if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp + || old_pipe->stream_res.opp != new_pipe->stream_res.opp) + new_pipe->update_flags.bits.mpcc = 1; + + /* Detect dppclk change */ + if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz) + new_pipe->update_flags.bits.dppclk = 1; + + /* Check for scl update */ + if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data))) + new_pipe->update_flags.bits.scaler = 1; + /* Check for vp update */ + if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect)) + || memcmp(&old_pipe->plane_res.scl_data.viewport_c, + &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect))) + new_pipe->update_flags.bits.viewport = 1; + + /* Detect dlg/ttu/rq updates */ + { + struct dml2_display_dlg_regs old_dlg_regs = old_pipe->hubp_regs.dlg_regs; + struct dml2_display_ttu_regs old_ttu_regs = old_pipe->hubp_regs.ttu_regs; + struct dml2_display_rq_regs old_rq_regs = old_pipe->hubp_regs.rq_regs; + struct dml2_display_dlg_regs *new_dlg_regs = &new_pipe->hubp_regs.dlg_regs; + struct dml2_display_ttu_regs *new_ttu_regs = &new_pipe->hubp_regs.ttu_regs; + struct dml2_display_rq_regs *new_rq_regs = &new_pipe->hubp_regs.rq_regs; + + /* Detect pipe interdependent updates */ + if ((old_dlg_regs.dst_y_prefetch != new_dlg_regs->dst_y_prefetch) + || (old_dlg_regs.vratio_prefetch != new_dlg_regs->vratio_prefetch) + || (old_dlg_regs.vratio_prefetch_c != new_dlg_regs->vratio_prefetch_c) + || (old_dlg_regs.dst_y_per_vm_vblank != new_dlg_regs->dst_y_per_vm_vblank) + || (old_dlg_regs.dst_y_per_row_vblank != new_dlg_regs->dst_y_per_row_vblank) + || (old_dlg_regs.dst_y_per_vm_flip != new_dlg_regs->dst_y_per_vm_flip) + || (old_dlg_regs.dst_y_per_row_flip != new_dlg_regs->dst_y_per_row_flip) + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_l != new_dlg_regs->refcyc_per_meta_chunk_vblank_l) + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_c != new_dlg_regs->refcyc_per_meta_chunk_vblank_c) + || (old_dlg_regs.refcyc_per_meta_chunk_flip_l != new_dlg_regs->refcyc_per_meta_chunk_flip_l) + || (old_dlg_regs.refcyc_per_line_delivery_pre_l != new_dlg_regs->refcyc_per_line_delivery_pre_l) + || (old_dlg_regs.refcyc_per_line_delivery_pre_c != new_dlg_regs->refcyc_per_line_delivery_pre_c) + || (old_ttu_regs.refcyc_per_req_delivery_pre_l != new_ttu_regs->refcyc_per_req_delivery_pre_l) + || (old_ttu_regs.refcyc_per_req_delivery_pre_c != new_ttu_regs->refcyc_per_req_delivery_pre_c) + || (old_ttu_regs.refcyc_per_req_delivery_pre_cur0 != + new_ttu_regs->refcyc_per_req_delivery_pre_cur0) + || (old_ttu_regs.min_ttu_vblank != new_ttu_regs->min_ttu_vblank) + || (old_ttu_regs.qos_level_flip != new_ttu_regs->qos_level_flip)) { + old_dlg_regs.dst_y_prefetch = new_dlg_regs->dst_y_prefetch; + old_dlg_regs.vratio_prefetch = new_dlg_regs->vratio_prefetch; + old_dlg_regs.vratio_prefetch_c = new_dlg_regs->vratio_prefetch_c; + old_dlg_regs.dst_y_per_vm_vblank = new_dlg_regs->dst_y_per_vm_vblank; + old_dlg_regs.dst_y_per_row_vblank = new_dlg_regs->dst_y_per_row_vblank; + old_dlg_regs.dst_y_per_vm_flip = new_dlg_regs->dst_y_per_vm_flip; + old_dlg_regs.dst_y_per_row_flip = new_dlg_regs->dst_y_per_row_flip; + old_dlg_regs.refcyc_per_meta_chunk_vblank_l = new_dlg_regs->refcyc_per_meta_chunk_vblank_l; + old_dlg_regs.refcyc_per_meta_chunk_vblank_c = new_dlg_regs->refcyc_per_meta_chunk_vblank_c; + old_dlg_regs.refcyc_per_meta_chunk_flip_l = new_dlg_regs->refcyc_per_meta_chunk_flip_l; + old_dlg_regs.refcyc_per_line_delivery_pre_l = new_dlg_regs->refcyc_per_line_delivery_pre_l; + old_dlg_regs.refcyc_per_line_delivery_pre_c = new_dlg_regs->refcyc_per_line_delivery_pre_c; + old_ttu_regs.refcyc_per_req_delivery_pre_l = new_ttu_regs->refcyc_per_req_delivery_pre_l; + old_ttu_regs.refcyc_per_req_delivery_pre_c = new_ttu_regs->refcyc_per_req_delivery_pre_c; + old_ttu_regs.refcyc_per_req_delivery_pre_cur0 = new_ttu_regs->refcyc_per_req_delivery_pre_cur0; + old_ttu_regs.min_ttu_vblank = new_ttu_regs->min_ttu_vblank; + old_ttu_regs.qos_level_flip = new_ttu_regs->qos_level_flip; + new_pipe->update_flags.bits.hubp_interdependent = 1; + } + /* Detect any other updates to ttu/rq/dlg */ + if (memcmp(&old_dlg_regs, new_dlg_regs, sizeof(old_dlg_regs)) || + memcmp(&old_ttu_regs, new_ttu_regs, sizeof(old_ttu_regs)) || + memcmp(&old_rq_regs, new_rq_regs, sizeof(old_rq_regs))) + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + } + + if (old_pipe->unbounded_req != new_pipe->unbounded_req) + new_pipe->update_flags.bits.unbounded_req = 1; + + if (memcmp(&old_pipe->stream_res.test_pattern_params, + &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) { + new_pipe->update_flags.bits.test_pattern_changed = 1; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 28a513dfc005..17cea748789e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -63,8 +63,6 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx); bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); -struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, - uint8_t region); void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc, const struct pipe_ctx *top_pipe_to_program); @@ -96,5 +94,12 @@ void dcn401_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context); void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx); - +void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context); +void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context); +bool dcn401_update_bandwidth(struct dc *dc, struct dc_state *context); +void dcn401_detect_pipe_changes( + struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 23e4f208152e..44cb376f97c1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -17,9 +17,9 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .init_hw = dcn401_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .program_front_end_for_ctx = dcn401_program_front_end_for_ctx, .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .post_unlock_program_front_end = dcn401_post_unlock_program_front_end, .update_plane_addr = dcn20_update_plane_addr, .update_dchub = dcn10_update_dchub, .update_pending_status = dcn10_update_pending_status, @@ -42,7 +42,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, + .update_bandwidth = dcn401_update_bandwidth, .set_drr = dcn10_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn31_set_static_screen_control, @@ -66,7 +66,6 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -100,6 +99,10 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, .program_outstanding_updates = dcn401_program_outstanding_updates, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .detect_pipe_changes = dcn401_detect_pipe_changes, + .enable_plane = dcn20_enable_plane, + .update_dchubp_dpp = dcn20_update_dchubp_dpp, + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 66fdc5805d0a..a7d66cfd93c9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -194,7 +194,6 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, - }; struct block_sequence { @@ -331,10 +330,6 @@ struct hw_sequencer_funcs { void (*disable_writeback)(struct dc *dc, unsigned int dwb_pipe_inst); - bool (*mmhubbub_warmup)(struct dc *dc, - unsigned int num_dwb, - struct dc_writeback_info *wb_info); - /* Clock Related */ enum dc_status (*set_clock)(struct dc *dc, enum dc_clock_type clock_type, @@ -462,6 +457,18 @@ struct hw_sequencer_funcs { struct dc_state *context); void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx); + void (*detect_pipe_changes)(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); + void (*enable_plane)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); + void (*update_dchubp_dpp)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); + void (*post_unlock_reset_opp)(struct dc *dc, + struct pipe_ctx *opp_head); }; void color_space_to_black_color( @@ -489,11 +496,12 @@ void get_hdr_visual_confirm_color( void get_mpctree_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); - +void get_vabc_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color); void get_subvp_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); - void get_fams2_visual_confirm_color( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 2edd5b38ce4f..d558efc6e12f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -45,9 +45,6 @@ #define MAX_SVP_PHANTOM_STREAMS 2 #define MAX_SVP_PHANTOM_PLANES 2 -void enable_surface_flip_reporting(struct dc_plane_state *plane_state, - uint32_t controller_id); - #include "grph_object_id.h" #include "link_encoder.h" #include "stream_encoder.h" @@ -220,6 +217,7 @@ struct resource_funcs { */ int (*get_power_profile)(const struct dc_state *context); unsigned int (*get_det_buffer_size)(const struct dc_state *context); + unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx); }; struct audio_support{ @@ -468,6 +466,7 @@ struct pipe_ctx { unsigned int surface_size_in_mall_bytes; struct dml2_dchub_per_pipe_register_set hubp_regs; struct dml2_hubp_pipe_mcache_regs mcache_regs; + union dml2_global_sync_programming global_sync; struct dwbc *dwbc; struct mcif_wb *mcif_wb; @@ -542,7 +541,8 @@ struct dcn_bw_output { bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; struct dmub_cmd_fams2_global_config fams2_global_config; - struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; + union dmub_cmd_fams2_config fams2_stream_base_params[DML2_MAX_PLANES]; + union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES]; struct dml2_display_arb_regs arb_regs; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 55529c5f471c..d19a595c2be4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -624,10 +624,6 @@ bool dcn_validate_bandwidth( struct dc_state *context, bool fast_validate); -unsigned int dcn_find_dcfclk_suits_all( - const struct dc *dc, - struct dc_clocks *clocks); - void dcn_get_soc_clks( struct dc *dc, int *min_fclk_khz, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 2d06067ff36d..c14d64687a3d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -306,6 +306,9 @@ struct clk_mgr_funcs { */ void (*set_hard_min_memclk)(struct clk_mgr *clk_mgr, bool current_mode); + int (*get_hard_min_memclk)(struct clk_mgr *clk_mgr); + int (*get_hard_min_fclk)(struct clk_mgr *clk_mgr); + /* Send message to PMFW to set hard max memclk frequency to highest DPM */ void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index c2dd061892f4..7a1ca1e98059 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -166,6 +166,41 @@ enum dentist_divider_range { CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \ CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT) +#define CLK_REG_LIST_DCN35() \ + CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \ + CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\ + CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ + CLK_SR_DCN35(CLK5_spll_field_8), \ + SR(DENTIST_DISPCLK_CNTL), \ + #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\ CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\ @@ -236,6 +271,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK1_CLK5_DFS_CNTL; uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; @@ -243,11 +279,34 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_CURRENT_CNT; uint32_t CLK1_CLK3_CURRENT_CNT; uint32_t CLK1_CLK4_CURRENT_CNT; + uint32_t CLK1_CLK5_CURRENT_CNT; uint32_t CLK0_CLK0_DFS_CNTL; uint32_t CLK0_CLK1_DFS_CNTL; uint32_t CLK0_CLK3_DFS_CNTL; uint32_t CLK0_CLK4_DFS_CNTL; + uint32_t CLK1_CLK0_BYPASS_CNTL; + uint32_t CLK1_CLK1_BYPASS_CNTL; + uint32_t CLK1_CLK2_BYPASS_CNTL; + uint32_t CLK1_CLK3_BYPASS_CNTL; + uint32_t CLK1_CLK4_BYPASS_CNTL; + uint32_t CLK1_CLK5_BYPASS_CNTL; + + uint32_t CLK1_CLK0_DS_CNTL; + uint32_t CLK1_CLK1_DS_CNTL; + uint32_t CLK1_CLK2_DS_CNTL; + uint32_t CLK1_CLK3_DS_CNTL; + uint32_t CLK1_CLK4_DS_CNTL; + uint32_t CLK1_CLK5_DS_CNTL; + + uint32_t CLK1_CLK0_ALLOW_DS; + uint32_t CLK1_CLK1_ALLOW_DS; + uint32_t CLK1_CLK2_ALLOW_DS; + uint32_t CLK1_CLK3_ALLOW_DS; + uint32_t CLK1_CLK4_ALLOW_DS; + uint32_t CLK1_CLK5_ALLOW_DS; + uint32_t CLK5_spll_field_8; + }; struct clk_mgr_shift { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 16580d624278..b610beb075d5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -42,6 +42,7 @@ #include "cursor_reg_cache.h" #include "dml2/dml21/inc/dml_top_dchub_registers.h" +#include "dml2/dml21/inc/dml_top_types.h" #define OPP_ID_INVALID 0xf #define MAX_TTU 0xffffff @@ -144,14 +145,26 @@ struct hubp_funcs { struct _vcs_dpi_display_rq_regs_st *rq_regs, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + void (*hubp_setup2)( + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); + void (*hubp_setup_interdependent)( struct hubp *hubp, struct _vcs_dpi_display_dlg_regs_st *dlg_regs, struct _vcs_dpi_display_ttu_regs_st *ttu_regs); + void (*hubp_setup_interdependent2)( + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *pipe_regs); + void (*dcc_control)(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size); + void (*hubp_reset)(struct hubp *hubp); + void (*mem_program_viewport)( struct hubp *hubp, const struct rect *viewport, @@ -165,7 +178,7 @@ struct hubp_funcs { void (*hubp_program_pte_vm)( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation); void (*hubp_set_vm_system_aperture_settings)( @@ -179,7 +192,7 @@ struct hubp_funcs { void (*hubp_program_surface_config)( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -275,6 +288,7 @@ struct hubp_funcs { enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); int (*hubp_get_3dlut_fl_done)(struct hubp *hubp); + void (*hubp_clear_tiling)(struct hubp *hubp); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index af9183f5d69b..08c16ba52a51 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -168,6 +168,14 @@ struct link_encoder_funcs { struct link_encoder *enc, enum encoder_type_select sel, uint32_t hpo_inst); + void (*enable_dpia_output)(struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy); + void (*disable_dpia_output)(struct link_encoder *link_enc, + uint8_t dpia_id, + uint8_t digmode); }; /* diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index a8b44f398ce6..42fbc70f7056 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -150,7 +150,7 @@ struct mem_input_funcs { void (*mem_input_program_pte_vm)( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation); void (*mem_input_set_vm_system_aperture_settings)( @@ -164,7 +164,7 @@ struct mem_input_funcs { void (*mem_input_program_surface_config)( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -187,6 +187,8 @@ struct mem_input_funcs { const struct dc_cursor_position *pos, const struct dc_cursor_mi_param *param); + void (*mem_input_clear_tiling)( + struct mem_input *mem_input); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 03cbcbb36f1c..6fdc9809280c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -210,7 +210,7 @@ void optc1_enable_crtc_reset(struct timing_generator *optc, bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params); -bool optc1_get_crc(struct timing_generator *optc, +bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index b74e18cc1e66..9885cb3c310f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -141,6 +141,9 @@ struct crc_params { bool continuous_mode; bool enable; + + uint8_t crc_eng_inst; + bool reset; }; /** @@ -291,7 +294,7 @@ struct timing_generator_funcs { * @get_crc: Get CRCs for the given timing generator. Return false if * CRCs are not enabled (via configure_crc). */ - bool (*get_crc)(struct timing_generator *tg, + bool (*get_crc)(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); void (*program_manual_trigger)(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index f04292086c08..fd1f9d3db039 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -148,6 +148,10 @@ struct link_service { const struct dc_stream_state *stream, const unsigned int num_streams); + uint32_t (*dp_required_hblank_size_bytes)( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + /*************************** DPMS *************************************/ void (*set_dpms_on)(struct dc_state *state, struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index 4fb9cd6708d5..1d61d475d36f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -30,8 +30,8 @@ #include "../dce110/irq_service_dce110.h" #include "irq_service_dcn201.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "cyan_skillfish_ip_offset.h" #include "soc15_hw_ip.h" diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index ff8fe1a94965..96febabf464a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -251,7 +251,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link) link_training_settings.lttpr_mode = dp_decide_lttpr_mode(link, &link->cur_link_settings); - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && link_training_settings.lttpr_mode == LTTPR_MODE_TRANSPARENT) dp_fixed_vs_pe_read_lane_adjust( link, @@ -646,7 +646,7 @@ bool dp_set_test_pattern( if (IS_DP_PHY_PATTERN(test_pattern)) { /* Set DPCD Lane Settings before running test pattern */ if (p_link_settings != NULL) { - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && p_link_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) { dp_fixed_vs_pe_set_retimer_lane_settings( link, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 3e47a6735912..06faa461067b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -164,7 +164,9 @@ void disable_dio_link_output(struct dc_link *link, { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); - link_enc->funcs->disable_output(link_enc, signal); + if (link_enc != NULL) + link_enc->funcs->disable_output(link_enc, signal); + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); } diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c index 348ea4cb832d..a6d1d7641ab4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c @@ -187,7 +187,7 @@ static const struct link_hwss dio_fixed_vs_pe_retimer_link_hwss = { bool requires_fixed_vs_pe_retimer_dio_link_hwss(const struct dc_link *link) { - return (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN); + return ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN); } const struct link_hwss *get_dio_fixed_vs_pe_retimer_link_hwss(void) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c index 6499807af72a..36adf95744fe 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c @@ -77,17 +77,74 @@ static void set_dio_dpia_lane_settings(struct dc_link *link, { } +static void enable_dpia_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal, + enum clock_source_id clock_source, + const struct dc_link_settings *link_settings) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (link_enc != NULL) { + if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->enable_dpia_output) { + uint8_t fec_rdy = link->dc->link_srv->dp_should_enable_fec(link); + uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE; + + link_enc->funcs->enable_dpia_output( + link_enc, + link_settings, + link->ddc_hw_inst, + digmode, + fec_rdy); + } else { + if (dc_is_dp_sst_signal(signal)) + link_enc->funcs->enable_dp_output( + link_enc, + link_settings, + clock_source); + else + link_enc->funcs->enable_dp_mst_output( + link_enc, + link_settings, + clock_source); + } + + } + + link->dc->link_srv->dp_trace_source_sequence(link, + DPCD_SOURCE_SEQ_AFTER_ENABLE_LINK_PHY); +} + +static void disable_dpia_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (link_enc != NULL) { + if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->disable_dpia_output) { + uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE; + + link_enc->funcs->disable_dpia_output(link_enc, link->ddc_hw_inst, digmode); + } else + link_enc->funcs->disable_output(link_enc, signal); + } + + link->dc->link_srv->dp_trace_source_sequence(link, + DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); +} + static const struct link_hwss dpia_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, .setup_stream_attribute = setup_dio_stream_attribute, - .disable_link_output = disable_dio_link_output, + .disable_link_output = disable_dpia_link_output, .setup_audio_output = setup_dio_audio_output, .enable_audio_packet = enable_dio_audio_packet, .disable_audio_packet = disable_dio_audio_packet, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, - .enable_dp_link_output = enable_dio_dp_link_output, + .enable_dp_link_output = enable_dpia_link_output, .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern, .set_dp_lane_settings = set_dio_dpia_lane_settings, .update_stream_allocation_table = update_dpia_stream_allocation_table, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h index ad16ec5d9bb7..259e0f4775e1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h @@ -27,6 +27,9 @@ #include "link_hwss.h" +#define DIG_SST_MODE 0 +#define DIG_MST_MODE 5 + const struct link_hwss *get_dpia_link_hwss(void); bool can_use_dpia_link_hwss(const struct dc_link *link, const struct link_resource *link_res); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index e026c728042a..550e1a098fa2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -829,7 +829,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, if (link->dc->debug.skip_detection_link_training || dc_is_embedded_signal(link->local_sink->sink_signal) || - link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training)) { destrictive = false; } else if (link_dp_get_encoding_format(&max_link_cap) == DP_8b_10b_ENCODING) { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 5d66bfc7fe6e..ec7de9c01fab 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -772,6 +772,20 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) return result; } +static bool dp_set_hblank_reduction_on_rx(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dc_stream_state *stream = pipe_ctx->stream; + bool result = false; + + if (dc_is_virtual_signal(stream->signal)) + result = true; + else + result = dm_helpers_dp_write_hblank_reduction(dc->ctx, stream); + return result; +} + + /* The stream with these settings can be sent (unblanked) only after DSC was enabled on RX first, * i.e. after dp_enable_dsc_on_rx() had been called */ @@ -1953,11 +1967,15 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10; if (stream->phy_pix_clk > 340000) is_over_340mhz = true; + if (dc_is_tmds_signal(stream->signal) && stream->phy_pix_clk > 6000000UL) { + ASSERT(false); + return; + } if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps & - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; - if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { /* DP159, Retimer settings */ eng_id = pipe_ctx->stream_res.stream_enc->id; @@ -1968,7 +1986,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) write_i2c_default_retimer_setting(pipe_ctx, is_vga_mode, is_over_340mhz); } - } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { /* PI3EQX1204, Redriver settings */ write_i2c_redriver_setting(pipe_ctx, is_over_340mhz); } @@ -2024,7 +2042,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, int lt_attempts = LINK_TRAINING_ATTEMPTS; // Increase retry count if attempting DP1.x on FIXED_VS link - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) lt_attempts = 10; @@ -2039,7 +2057,8 @@ static enum dc_status enable_link_dp(struct dc_state *state, /* Train with fallback when enabling DPIA link. Conventional links are * trained with fallback during sink detection. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training) do_fallback = true; /* @@ -2375,13 +2394,13 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id; unsigned short masked_chip_caps = link->chip_caps & - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; //Need to inform that sink is going to use legacy HDMI mode. write_scdc_data( link->ddc, 165000,//vbios only handles 165Mhz. false); - if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { /* DP159, Retimer settings */ if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) write_i2c_retimer_setting(pipe_ctx, @@ -2389,7 +2408,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) else write_i2c_default_retimer_setting(pipe_ctx, false, false); - } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { /* PI3EQX1204, Redriver settings */ write_i2c_redriver_setting(pipe_ctx, false); } @@ -2529,6 +2548,15 @@ void link_set_dpms_on( if (pipe_ctx->stream->dpms_off) return; + /* For Dp tunneling link, a pending HPD means that we have a race condition between processing + * current link and processing the pending HPD. If we enable the link now, we may end up with a + * link that is not actually connected to a sink. So we skip enabling the link in this case. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) { + DC_LOG_DEBUG("%s, Link%d HPD is pending, not enable it.\n", __func__, link->link_index); + return; + } + /* Have to setup DSC before DIG FE and BE are connected (which happens before the * link training). This is to make sure the bandwidth sent to DIG BE won't be * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag @@ -2594,6 +2622,9 @@ void link_set_dpms_on( } } + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + dp_set_hblank_reduction_on_rx(pipe_ctx); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) allocate_usb4_bandwidth(pipe_ctx->stream); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 5e1b5ab9fbc6..a7877d57a00f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -101,6 +101,7 @@ static void construct_link_service_validation(struct link_service *link_srv) link_srv->validate_mode_timing = link_validate_mode_timing; link_srv->dp_link_bandwidth_kbps = dp_link_bandwidth_kbps; link_srv->validate_dpia_bandwidth = link_validate_dpia_bandwidth; + link_srv->dp_required_hblank_size_bytes = dp_required_hblank_size_bytes; } /* link dpms owns the programming sequence of stream's dpms state associated @@ -698,7 +699,7 @@ static bool construct_phy(struct dc_link *link, link->chip_caps); } - if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) { + if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) { link->bios_forced_drive_settings.VOLTAGE_SWING = (bios->integrated_info->ext_disp_conn_info.fixdpvoltageswing & 0x3); link->bios_forced_drive_settings.PRE_EMPHASIS = diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 60f15a9ba7a5..29606fda029d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -409,3 +409,182 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); } + +struct dp_audio_layout_config { + uint8_t layouts_per_sample_denom; + uint8_t symbols_per_layout; + uint8_t max_layouts_per_audio_sdp; +}; + +static void get_audio_layout_config( + uint32_t channel_count, + enum dp_link_encoding encoding, + struct dp_audio_layout_config *output) +{ + memset(output, 0, sizeof(struct dp_audio_layout_config)); + + /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP, + * with each layout being the same size (8ch layout). + */ + if (encoding == DP_8b_10b_ENCODING) { + if (channel_count == 2) { + output->layouts_per_sample_denom = 4; + output->symbols_per_layout = 40; + output->max_layouts_per_audio_sdp = 1; + } else if (channel_count == 8 || channel_count == 6) { + output->layouts_per_sample_denom = 1; + output->symbols_per_layout = 40; + output->max_layouts_per_audio_sdp = 1; + } + } else if (encoding == DP_128b_132b_ENCODING) { + if (channel_count == 2) { + output->layouts_per_sample_denom = 4; + output->symbols_per_layout = 10; + output->max_layouts_per_audio_sdp = 1; + } else if (channel_count == 8 || channel_count == 6) { + output->layouts_per_sample_denom = 1; + output->symbols_per_layout = 10; + output->max_layouts_per_audio_sdp = 1; + } + } +} + +static uint32_t get_av_stream_map_lane_count( + enum dp_link_encoding encoding, + enum dc_lane_count lane_count, + bool is_mst) +{ + uint32_t av_stream_map_lane_count = 0; + + if (encoding == DP_8b_10b_ENCODING) { + if (!is_mst) + av_stream_map_lane_count = lane_count; + else + av_stream_map_lane_count = 4; + } else if (encoding == DP_128b_132b_ENCODING) { + av_stream_map_lane_count = 4; + } + + ASSERT(av_stream_map_lane_count != 0); + + return av_stream_map_lane_count; +} + +static uint32_t get_audio_sdp_overhead( + enum dp_link_encoding encoding, + enum dc_lane_count lane_count, + bool is_mst) +{ + uint32_t audio_sdp_overhead = 0; + + if (encoding == DP_8b_10b_ENCODING) { + if (is_mst) + audio_sdp_overhead = 16; /* 4 * 2 + 8 */ + else + audio_sdp_overhead = lane_count * 2 + 8; + } else if (encoding == DP_128b_132b_ENCODING) { + audio_sdp_overhead = 10; /* 4 x 2.5 */ + } + + ASSERT(audio_sdp_overhead != 0); + + return audio_sdp_overhead; +} + +/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST. + */ +static uint32_t calculate_overhead_hblank_bw_in_symbols( + uint32_t max_slice_h) +{ + uint32_t overhead_hblank_bw = 0; /* in stream symbols */ + + overhead_hblank_bw += max_slice_h * 4; /* EOC overhead */ + overhead_hblank_bw += 12; /* Main link overhead (VBID, BS/BE) */ + + return overhead_hblank_bw; +} + +uint32_t dp_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params) +{ + /* Main logic from dce_audio is duplicated here, with the main + * difference being: + * - Pre-determined lane count of 4 + * - Assumed 16 dsc slices for worst case + * - Assumed SDP split disabled for worst case + * TODO: Unify logic from dce_audio to prevent duplicated logic. + */ + + const struct dc_crtc_timing *timing = audio_params->crtc_timing; + const uint32_t channel_count = audio_params->channel_count; + const uint32_t sample_rate_hz = audio_params->sample_rate_hz; + const enum dp_link_encoding link_encoding = audio_params->link_encoding; + + // 8b/10b MST and 128b/132b are always 4 logical lanes. + const uint32_t lane_count = 4; + const bool is_mst = (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT); + // Maximum slice count is with ODM 4:1, 4 slices per DSC + const uint32_t max_slices_h = 16; + + const uint32_t av_stream_map_lane_count = get_av_stream_map_lane_count( + link_encoding, lane_count, is_mst); + const uint32_t audio_sdp_overhead = get_audio_sdp_overhead( + link_encoding, lane_count, is_mst); + struct dp_audio_layout_config layout_config; + + if (link_encoding == DP_8b_10b_ENCODING && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) + return 0; + + get_audio_layout_config( + channel_count, link_encoding, &layout_config); + + /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */ + struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100); + struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction( + timing->pix_clk_100hz, (long long)timing->h_total * 10); + struct fixed31_32 samples_per_line; + struct fixed31_32 layouts_per_line; + struct fixed31_32 symbols_per_sdp_max_layout; + struct fixed31_32 remainder; + uint32_t num_sdp_with_max_layouts; + uint32_t required_symbols_per_hblank; + uint32_t required_bytes_per_hblank = 0; + + samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000); + samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz); + layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config.layouts_per_sample_denom); + // HBlank expansion usage assumes SDP split disabled to allow for worst case. + layouts_per_line = dc_fixpt_from_int(dc_fixpt_ceil(layouts_per_line)); + + num_sdp_with_max_layouts = dc_fixpt_floor( + dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp)); + symbols_per_sdp_max_layout = dc_fixpt_from_int( + layout_config.max_layouts_per_audio_sdp * layout_config.symbols_per_layout); + symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead); + symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin); + required_symbols_per_hblank = num_sdp_with_max_layouts; + required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) / + av_stream_map_lane_count) * av_stream_map_lane_count; + + if (num_sdp_with_max_layouts != dc_fixpt_ceil( + dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp))) { + remainder = dc_fixpt_sub_int(layouts_per_line, + num_sdp_with_max_layouts * layout_config.max_layouts_per_audio_sdp); + remainder = dc_fixpt_mul_int(remainder, layout_config.symbols_per_layout); + remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead); + remainder = dc_fixpt_mul(remainder, audio_sdp_margin); + required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) / + av_stream_map_lane_count) * av_stream_map_lane_count; + } + + required_symbols_per_hblank += calculate_overhead_hblank_bw_in_symbols(max_slices_h); + + if (link_encoding == DP_8b_10b_ENCODING) + required_bytes_per_hblank = required_symbols_per_hblank; // 8 bits per 8b/10b symbol + else if (link_encoding == DP_128b_132b_ENCODING) + required_bytes_per_hblank = required_symbols_per_hblank * 4; // 32 bits per 128b/132b symbol + + return required_bytes_per_hblank; +} + diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index 595fb05946e9..bf398c49c3e8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -37,4 +37,9 @@ uint32_t dp_link_bandwidth_kbps( const struct dc_link *link, const struct dc_link_settings *link_settings); + +uint32_t dp_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + #endif /* __LINK_VALIDATION_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c index d6d5bbf2108c..267180e7bc48 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c @@ -505,7 +505,7 @@ bool try_to_configure_aux_timeout(struct ddc_service *ddc, bool result = false; struct ddc *ddc_pin = ddc->ddc_pin; - if ((ddc->link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((ddc->link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && !ddc->link->dc->debug.disable_fixed_vs_aux_timeout_wa && ddc->ctx->dce_version == DCN_VERSION_3_1) { /* Fixed VS workaround for AUX timeout */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 9dabaf682171..44c3023a7731 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1554,7 +1554,7 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. @@ -1632,13 +1632,6 @@ static bool retrieve_link_cap(struct dc_link *link) sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt)); } - /* Read DP tunneling information. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - status = dpcd_get_tunneling_device_data(link); - if (status != DC_OK) - dm_error("%s: Read tunneling device data failed.\n", __func__); - } - dpcd_set_source_specific_data(link); /* Sink may need to configure internals based on vendor, so allow some * time before proceeding with possibly vendor specific transactions @@ -1711,7 +1704,7 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data; if (status != DC_OK) - dm_error("%s: Read DPRX caps data failed.\n", __func__); + dm_error("%s: Read DPRX feature list failed.\n", __func__); /* AdaptiveSyncCapability */ dpcd_dprx_data = 0; @@ -1726,15 +1719,13 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.adaptive_sync_caps.dp_adap_sync_caps.raw = dpcd_dprx_data; if (status != DC_OK) - dm_error("%s: Read DPRX caps data failed. Addr:%#x\n", + dm_error("%s: Read DPRX feature list_1 failed. Addr:%#x\n", __func__, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1); } - else { link->dpcd_caps.dprx_feature.raw = 0; } - /* Error condition checking... * It is impossible for Sink to report Max Lane Count = 0. * It is possible for Sink to report Max Link Rate = 0, if it is @@ -1788,6 +1779,11 @@ static bool retrieve_link_cap(struct dc_link *link) link->test_pattern_enabled = false; link->compliance_test_state.raw = 0; + link->dpcd_caps.receive_port0_cap.raw[0] = + dpcd_data[DP_RECEIVE_PORT_0_CAP_0 - DP_DPCD_REV]; + link->dpcd_caps.receive_port0_cap.raw[1] = + dpcd_data[DP_RECEIVE_PORT_0_BUFFER_SIZE - DP_DPCD_REV]; + /* read sink count */ core_link_read_dpcd(link, DP_SINK_COUNT, @@ -1918,6 +1914,7 @@ static bool retrieve_link_cap(struct dc_link *link) if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { DC_LOG_DP2("128b/132b encoding is supported at link %d", link->link_index); + /* Read 128b/132b suppoerted link rates */ core_link_read_dpcd(link, DP_128B132B_SUPPORTED_LINK_RATES, &link->dpcd_caps.dp_128b_132b_supported_link_rates.raw, @@ -1965,6 +1962,13 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw, sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); + /* Read DP tunneling information. */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + status = dpcd_get_tunneling_device_data(link); + if (status != DC_OK) + dm_error("%s: Read DP tunneling device data failed.\n", __func__); + } + retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); @@ -2308,6 +2312,14 @@ bool dp_verify_link_cap_with_retries( } else { link->verified_link_cap = last_verified_link_cap; } + + /* For Dp tunneling link, a pending HPD means that we have a race condition between processing + * current link and processing the pending HPD. Since the training is failed, we should just brak + * the loop so that we have chance to process the pending HPD. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) + break; + fsleep(10 * 1000); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 48abeaa88678..a08403c022ea 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -226,6 +226,8 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) replay_configuration.bits.STATE_TRANSITION_ERROR_STATUS) { bool allow_active; + link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw; + if (link->replay_settings.config.force_disable_desync_error_check) return; @@ -237,6 +239,9 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_configuration.raw, sizeof(replay_configuration.raw)); + /* Update desync error counter */ + link->replay_settings.replay_desync_error_fail_count++; + /* Acknowledge and clear error bits */ dm_helpers_dp_write_dpcd( link->ctx, @@ -408,7 +413,8 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { // Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training) link->skip_fallback_on_link_loss = true; device_service_clear.bits.AUTOMATED_TEST = 1; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c index bafa52a0165a..2c73ac87cd66 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c @@ -104,7 +104,7 @@ void dp_set_hw_lane_settings( // Don't return here if using FIXED_VS link HWSS and encoding is 128b/132b if ((link_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && !is_immediate_downstream(link, offset) && - (!(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) || + (!((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) || link_dp_get_encoding_format(&link_settings->link_settings) == DP_8b_10b_ENCODING)) return; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 754c895e1bfb..88d4288cde0f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -739,7 +739,7 @@ void override_training_settings( if (overrides->ffe_preset != NULL) lt_settings->ffe_preset = overrides->ffe_preset; /* Override HW lane settings with BIOS forced values if present */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) { lt_settings->voltage_swing = &link->bios_forced_drive_settings.VOLTAGE_SWING; lt_settings->pre_emphasis = &link->bios_forced_drive_settings.PRE_EMPHASIS; @@ -1574,7 +1574,7 @@ enum link_training_result dp_perform_link_training( * Per DP specs starting from here, DPTX device shall not issue * Non-LT AUX transactions inside training mode. */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); else if (encoding == DP_8b_10b_ENCODING) status = dp_perform_8b_10b_link_training(link, link_res, <_settings); diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index fe26fde12eeb..85298b8a1b5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -110,6 +110,23 @@ void mpc3_disable_dwb_mux( MPC_DWB0_MUX, 0xf); } +void mpc3_set_out_rate_control( + struct mpc *mpc, + int opp_id, + bool enable, + bool rate_2x_mode, + struct mpc_dwb_flow_control *flow_control) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + /* Always disable mpc out rate and flow control. + * MPC flow rate control is not needed for DCN30 and above. + */ + REG_UPDATE_2(MUX[opp_id], + MPC_OUT_RATE_CONTROL_DISABLE, 1, + MPC_OUT_RATE_CONTROL, 0); +} + enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id) { /*Contrary to DCN2 and DCN1 wherein a single status register field holds this info; @@ -1519,6 +1536,7 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, + .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc3_program_shaper, .acquire_rmu = mpcc3_acquire_rmu, diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h index ce93003dae01..103f29900a2c 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h @@ -1085,6 +1085,13 @@ bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id); +void mpc3_set_out_rate_control( + struct mpc *mpc, + int opp_id, + bool enable, + bool rate_2x_mode, + struct mpc_dwb_flow_control *flow_control); + void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 097d06023e64..19d5ebc6763c 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -302,7 +302,6 @@ void optc1_program_timing( /* Enable stereo - only when we need to pack 3D frame. Other types * of stereo handled in explicit call */ - if (optc->funcs->is_two_pixels_per_container(&patched_crtc_timing) || optc1->opp_count == 2) h_div = H_TIMING_DIV_BY2; @@ -1471,37 +1470,71 @@ bool optc1_configure_crc(struct timing_generator *optc, if (!optc1_is_tg_enabled(optc)) return false; - REG_WRITE(OTG_CRC_CNTL, 0); + if (!params->enable || params->reset) + REG_WRITE(OTG_CRC_CNTL, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - - /* Window A y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - - /* Window B x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - - /* Window B y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + default: + return false; + } return true; } @@ -1510,6 +1543,7 @@ bool optc1_configure_crc(struct timing_generator *optc, * optc1_get_crc - Capture CRC result per component * * @optc: timing_generator instance. + * @idx: index of crc engine to get CRC from * @r_cr: 16-bit primary CRC signature for red data. * @g_y: 16-bit primary CRC signature for green data. * @b_cb: 16-bit primary CRC signature for blue data. @@ -1521,7 +1555,7 @@ bool optc1_configure_crc(struct timing_generator *optc, * If CRC is disabled, return false; otherwise, return true, and the CRC * results in the parameters. */ -bool optc1_get_crc(struct timing_generator *optc, +bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t field = 0; @@ -1533,14 +1567,30 @@ bool optc1_get_crc(struct timing_generator *optc, if (!field) return false; - /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ - REG_GET_2(OTG_CRC0_DATA_RG, - CRC0_R_CR, r_cr, - CRC0_G_Y, g_y); + switch (idx) { + case 0: + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ + REG_GET_2(OTG_CRC0_DATA_RG, + CRC0_R_CR, r_cr, + CRC0_G_Y, g_y); - /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ - REG_GET(OTG_CRC0_DATA_B, - CRC0_B_CB, b_cb); + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ + REG_GET(OTG_CRC0_DATA_B, + CRC0_B_CB, b_cb); + break; + case 1: + /* OTG_CRC1_DATA_RG has the CRC16 results for the red and green component */ + REG_GET_2(OTG_CRC1_DATA_RG, + CRC1_R_CR, r_cr, + CRC1_G_Y, g_y); + + /* OTG_CRC1_DATA_B has the CRC16 results for the blue component */ + REG_GET(OTG_CRC1_DATA_B, + CRC1_B_CB, b_cb); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 40757f20d73f..159172178d51 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -86,6 +86,12 @@ SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_DATA_RG, OTG, inst),\ + SRI(OTG_CRC1_DATA_B, OTG, inst),\ + SRI(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\ SR(GSL_SOURCE_SELECT),\ SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst) @@ -315,6 +321,7 @@ struct dcn_optc_registers { SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC1_SELECT, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ @@ -327,6 +334,17 @@ struct dcn_optc_registers { SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ @@ -482,6 +500,7 @@ struct dcn_optc_registers { type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\ type OTG_CRC_CONT_EN;\ type OTG_CRC0_SELECT;\ + type OTG_CRC1_SELECT;\ type OTG_CRC_EN;\ type CRC0_R_CR;\ type CRC0_G_Y;\ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index dfa9364fe5a6..d21e82b927d0 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -183,34 +183,87 @@ static bool optc35_configure_crc(struct timing_generator *optc, { struct optc *optc1 = DCN10TG_FROM_TG(optc); + /* Cannot configure crc on a CRTC that is disabled */ if (!optc1_is_tg_enabled(optc)) return false; - REG_WRITE(OTG_CRC_CNTL, 0); + + if (!params->enable || params->reset) + REG_WRITE(OTG_CRC_CNTL, 0); + if (!params->enable) return true; - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) { - REG_UPDATE_4(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1, - OTG_CRC_WINDOW_DB_EN, 1); - } else - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); + + /* Program frame boundaries */ + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) + REG_UPDATE_4(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1, + OTG_CRC_WINDOW_DB_EN, 1); + else + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) + REG_UPDATE_4(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1, + OTG_CRC_WINDOW_DB_EN, 1); + else + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 783ca9acc762..338a0cad23a5 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -315,7 +315,7 @@ void optc401_set_drr( struct drr_params amended_params = { 0 }; bool program_manual_trigger = false; - if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) { if (params != NULL && params->vertical_total_max > 0 && params->vertical_total_min > 0) { @@ -380,7 +380,7 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i { struct dc *dc = optc->ctx->dc; - if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) { /* FAMS2 */ dc_dmub_srv_fams2_drr_update(dc, optc->inst, vtotal_min, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index 770a380cc03d..e92f14d50adb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -1258,6 +1258,11 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( return NULL; } +unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) +{ + return pipe_ctx->pipe_dlg_param.vstartup_start; +} + static const struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; @@ -1272,7 +1277,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h index bf8e33cd8147..7bc1be53e800 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h @@ -51,6 +51,7 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( const struct resource_pool *pool, struct dc_stream_state *stream); +unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); #endif /* __DC_RESOURCE_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 7a5b9aa5292c..5c6dc710e96c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1509,60 +1509,9 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->prev_odm_pipe = prev_odm_pipe; if (prev_odm_pipe->plane_state) { - struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; - struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp; - int new_width; - - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Calculate new vp and recout for left pipe */ - /* Need at least 16 pixels width per side */ - if (sd->recout.x + 16 >= sd->h_active) - return false; - new_width = sd->h_active - sd->recout.x; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - - /* Calculate new vp and recout for right pipe */ - sd = &next_odm_pipe->plane_res.scl_data; - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Need at least 16 pixels width per side */ - if (new_width <= 16) - return false; - new_width = sd->recout.width + sd->recout.x - sd->h_active; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->h_active - sd->recout.x)); - sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->h_active - sd->recout.x)); - sd->recout.x = 0; - - /* - * When odm is used in YcbCr422 or 420 colour space, a split screen - * will be seen with the previous calculations since the extra left - * edge pixel is accounted for in fmt but not in viewport. - * - * Below are calculations which fix the split by fixing the calculations - * if there is an extra left edge pixel. - */ - if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count - && opp->funcs->opp_get_left_edge_extra_pixel_count( - opp, next_odm_pipe->stream->timing.pixel_encoding, - resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) { - sd->h_active += 1; - sd->recout.width += 1; - sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + if (!resource_build_scaling_params(prev_odm_pipe) || + !resource_build_scaling_params(next_odm_pipe)) { + return false; } } @@ -2280,7 +2229,8 @@ static const struct resource_funcs dcn20_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index d3d67d366523..43fa2cb117f3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -59,8 +59,8 @@ #include "cyan_skillfish_ip_offset.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "dpcs/dpcs_2_0_3_offset.h" #include "dpcs/dpcs_2_0_3_sh_mask.h" @@ -1079,7 +1079,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn201_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 021ba8ac5c8c..2615c36d5ffe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1378,6 +1378,7 @@ static const struct resource_funcs dcn21_res_pool_funcs = { .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, .update_bw_bounding_box = dcn21_update_bw_bounding_box, .get_panel_config_defaults = dcn21_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn21_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index bfd0eccbed28..13202ce30d66 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2250,6 +2250,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .update_bw_bounding_box = dcn30_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn30_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; #define CTX ctx diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index a9816affd312..121a86a59833 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -671,9 +671,9 @@ static const struct dc_plane_cap plane_cap = { /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 + .argb8888 = 358, + .nv12 = 358, + .fp16 = 358 }, 64, 64 @@ -693,7 +693,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ + .max_downscale_src_width = 4096,/*upto true 4k*/ .scl_reset_length10 = true, .sanity_checks = false, .underflow_assert_delay_us = 0xFFFFFFFF, @@ -1400,7 +1400,8 @@ static struct resource_funcs dcn301_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn301_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn301_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 7baefc910a3d..012c5fd52cb1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1151,6 +1151,7 @@ static struct resource_funcs dcn302_res_pool_funcs = { .update_bw_bounding_box = dcn302_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn302_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 8a57d46ad15f..a8d0b4686f9a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1096,6 +1096,7 @@ static struct resource_funcs dcn303_res_pool_funcs = { .update_bw_bounding_box = dcn303_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn303_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 54ec3d8e920c..911bd60d4fbc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1849,6 +1849,7 @@ static struct resource_funcs dcn31_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn31_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 2794473f2aff..e3ba105034f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1778,6 +1778,7 @@ static struct resource_funcs dcn314_res_pool_funcs = { .get_panel_config_defaults = dcn314_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 4ee33eb3381d..14acef036b5a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1846,6 +1846,7 @@ static struct resource_funcs dcn315_res_pool_funcs = { .get_panel_config_defaults = dcn315_get_panel_config_defaults, .get_power_profile = dcn315_get_power_profile, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn315_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 79eddbafe3c2..568094827212 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1720,6 +1720,7 @@ static struct resource_funcs dcn316_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn316_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn316_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 12d247a7ec45..664302876019 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2066,6 +2066,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 06b9479c8bd3..38d76434683e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1624,6 +1624,7 @@ static struct resource_funcs dcn321_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 89e2adcf2a28..8ee3d99ea2aa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1752,6 +1752,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, return out; } +enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + plane_state->tiling_info.gfxversion = DcGfxVersion9; + dcn20_patch_unknown_plane_state(plane_state); + return DC_OK; +} + static struct resource_funcs dcn35_res_pool_funcs = { .destroy = dcn35_destroy_resource_pool, @@ -1775,10 +1782,11 @@ static struct resource_funcs dcn35_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn35_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h index f97bb4cb3761..9d03a55d90cf 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h @@ -35,6 +35,7 @@ extern struct _vcs_dpi_ip_params_st dcn3_5_ip; extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc; +enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state); struct dcn35_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 263a37c1cd3a..14f7c3acdc96 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1754,10 +1754,11 @@ static struct resource_funcs dcn351_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn351_update_bw_bounding_box_fpu, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn351_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 2a3dabfe3cea..c1ebc6b1c937 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -726,6 +726,10 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, .dcc_meta_propagation_delay_us = 10, + .fams_version = { + .minor = 1, + .major = 2, + }, //v2.1 .fams2_config = { .bits = { .enable = true, @@ -733,7 +737,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_stall_recovery = true, } }, - .force_cositing = CHROMA_COSITING_TOPLEFT + 1, + .force_cositing = CHROMA_COSITING_NONE + 1, }; static struct dce_aux *dcn401_aux_engine_create( @@ -1293,6 +1297,29 @@ static struct hpo_dp_link_encoder *dcn401_hpo_dp_link_encoder_create( return &hpo_dp_enc31->base; } +static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) +{ + unsigned int num_available_chans = 1; + + /* channels for MALL must be a power of 2 */ + while (num_chans > 1) { + num_available_chans = (num_available_chans << 1); + num_chans = (num_chans >> 1); + } + + /* cannot be odd */ + num_available_chans &= ~1; + + /* clamp to max available channels for MALL per ASIC */ + if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; + } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; + } + + return num_available_chans; +} + static struct dce_hwseq *dcn401_hwseq_create( struct dc_context *ctx) { @@ -1588,6 +1615,14 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); + /* re-calculate the available MALL size if required */ + if (bw_params->num_channels > 0) { + dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall( + dc, bw_params->num_channels) * + dc->caps.mall_size_per_mem_channel * 1024 * 1024; + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; + } + DC_FP_START(); dcn401_update_bw_bounding_box_fpu(dc, bw_params); @@ -1605,6 +1640,7 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state) { + plane_state->tiling_info.gfxversion = DcGfxAddr3; plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D; return DC_OK; } @@ -1704,27 +1740,9 @@ static int dcn401_get_power_profile(const struct dc_state *context) return dpm_level; } -static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) +static unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) { - unsigned int num_available_chans = 1; - - /* channels for MALL must be a power of 2 */ - while (num_chans > 1) { - num_available_chans = (num_available_chans << 1); - num_chans = (num_chans >> 1); - } - - /* cannot be odd */ - num_available_chans &= ~1; - - /* clamp to max available channels for MALL per ASIC */ - if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { - num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; - } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { - num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; - } - - return num_available_chans; + return pipe_ctx->global_sync.dcn4x.vstartup_lines; } static struct resource_funcs dcn401_res_pool_funcs = { @@ -1754,6 +1772,7 @@ static struct resource_funcs dcn401_res_pool_funcs = { .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_power_profile = dcn401_get_power_profile, + .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 73a65913cb12..38a9a0d68058 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -11,6 +11,41 @@ #define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 +static bool spl_is_yuv420(enum spl_pixel_format format) +{ + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) + return true; + + return false; +} + +static bool spl_is_rgb8(enum spl_pixel_format format) +{ + if (format == SPL_PIXEL_FORMAT_ARGB8888) + return true; + + return false; +} + +static bool spl_is_video_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN + && format <= SPL_PIXEL_FORMAT_VIDEO_END) + return true; + else + return false; +} + +static bool spl_is_subsampled_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN + && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) + return true; + else + return false; +} + static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) { struct spl_rect rec; @@ -137,15 +172,32 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( struct spl_in *spl_in, struct spl_rect *plane_clip_rec) { - int mpc_slice_count = spl_in->basic_in.mpc_combine_h; - int mpc_slice_idx = spl_in->basic_in.mpc_combine_v; + bool use_recout_width_aligned = + spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned; + int mpc_slice_count = + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices; + int recout_width_align = + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_recout_width_align; + int mpc_slice_idx = spl_in->basic_in.mpc_h_slice_index; int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1; struct spl_rect mpc_rec; - mpc_rec.width = plane_clip_rec->width / mpc_slice_count; - mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; - mpc_rec.height = plane_clip_rec->height; - mpc_rec.y = plane_clip_rec->y; + if (use_recout_width_aligned) { + mpc_rec.width = recout_width_align; + if ((mpc_rec.width * (mpc_slice_idx + 1)) > plane_clip_rec->width) { + mpc_rec.width = plane_clip_rec->width % recout_width_align; + mpc_rec.x = plane_clip_rec->x + recout_width_align * mpc_slice_idx; + } else + mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + + } else { + mpc_rec.width = plane_clip_rec->width / mpc_slice_count; + mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + } SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -391,8 +443,7 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; - if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { + if (spl_is_yuv420(spl_in->basic_in.format)) { spl_scratch->scl_data.ratios.horz_c.value /= 2; spl_scratch->scl_data.ratios.vert_c.value /= 2; } @@ -529,23 +580,6 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, *vp_offset = src_size - *vp_offset - *vp_size; } -static bool spl_is_yuv420(enum spl_pixel_format format) -{ - if ((format >= SPL_PIXEL_FORMAT_420BPP8) && - (format <= SPL_PIXEL_FORMAT_420BPP10)) - return true; - - return false; -} - -static bool spl_is_rgb8(enum spl_pixel_format format) -{ - if (format == SPL_PIXEL_FORMAT_ARGB8888) - return true; - - return false; -} - /*Calculate inits and viewport */ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_scratch *spl_scratch) @@ -556,8 +590,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_rect recout_clip_in_recout_dst; struct spl_rect overlap_in_active_timing; struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in); - int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; + int vpc_div = spl_is_subsampled_format(spl_in->basic_in.format) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; @@ -585,12 +618,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, &flip_vert_scan_dir, &flip_horz_scan_dir); - if (orthogonal_rotation) { - spl_swap(src.width, src.height); - spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); - } - - if (spl_is_yuv420(spl_in->basic_in.format)) { + if (spl_is_subsampled_format(spl_in->basic_in.format)) { /* this gives the direction of the cositing (negative will move * left, right otherwise) */ @@ -598,15 +626,15 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, switch (spl_in->basic_in.cositing) { - case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_zero; + case CHROMA_COSITING_TOPLEFT: + init_adj_h = spl_fixpt_from_fraction(sign, 4); init_adj_v = spl_fixpt_from_fraction(sign, 4); break; - case CHROMA_COSITING_NONE: + case CHROMA_COSITING_LEFT: init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_zero; break; - case CHROMA_COSITING_TOPLEFT: + case CHROMA_COSITING_NONE: default: init_adj_h = spl_fixpt_zero; init_adj_v = spl_fixpt_zero; @@ -614,6 +642,12 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, } } + if (orthogonal_rotation) { + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(init_adj_h, init_adj_v); + } + spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, @@ -678,7 +712,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * since 3d is special and needs to calculate vp as if there is no recout offset * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ - if (spl_in->basic_in.mpc_combine_v) { + if (spl_in->basic_in.mpc_h_slice_index) { SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); @@ -698,24 +732,6 @@ static void spl_clamp_viewport(struct spl_rect *viewport) viewport->width = MIN_VIEWPORT_SIZE; } -static bool spl_dscl_is_420_format(enum spl_pixel_format format) -{ - if (format == SPL_PIXEL_FORMAT_420BPP8 || - format == SPL_PIXEL_FORMAT_420BPP10) - return true; - else - return false; -} - -static bool spl_dscl_is_video_format(enum spl_pixel_format format) -{ - if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN - && format <= SPL_PIXEL_FORMAT_VIDEO_END) - return true; - else - return false; -} - static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, const struct spl_scaler_data *data, bool enable_isharp, bool enable_easf) @@ -732,8 +748,8 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; - if (!spl_dscl_is_420_format(pixel_format)) { - if (spl_dscl_is_video_format(pixel_format)) + if (!spl_is_subsampled_format(pixel_format)) { + if (spl_is_video_format(pixel_format)) return SCL_MODE_SCALING_444_YCBCR_ENABLE; else return SCL_MODE_SCALING_444_RGB_ENABLE; @@ -756,7 +772,7 @@ static bool spl_choose_lls_policy(enum spl_pixel_format format, enum spl_transfer_func_predefined tf_predefined_type, enum linear_light_scaling *lls_pref) { - if (spl_is_yuv420(format)) { + if (spl_is_video_format(format)) { *lls_pref = LLS_PREF_NO; if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) @@ -815,7 +831,7 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) /* Check if video is in fullscreen mode */ static bool spl_is_video_fullscreen(struct spl_in *spl_in) { - if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + if (spl_is_video_format(spl_in->basic_in.format) && spl_in->is_fullscreen) return true; return false; } @@ -846,10 +862,10 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, * Apply sharpness to RGB and YUV (NV12/P010) * surfaces based on policy setting */ - if (!spl_is_yuv420(spl_in->basic_in.format) && + if (!spl_is_video_format(spl_in->basic_in.format) && (spl_in->sharpen_policy == SHARPEN_YUV)) return enable_isharp; - else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && + else if ((spl_is_video_format(spl_in->basic_in.format) && !fullscreen) && (spl_in->sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) return enable_isharp; else if (!spl_in->is_fullscreen && @@ -882,8 +898,8 @@ static void spl_get_taps_non_adaptive_scaler( if (in_taps->v_taps == 0) { if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) - spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert, 2)), 8); + spl_scratch->scl_data.taps.v_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.vert), 8); else spl_scratch->scl_data.taps.v_taps = 4; } else @@ -891,8 +907,8 @@ static void spl_get_taps_non_adaptive_scaler( if (in_taps->v_taps_c == 0) { if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) - spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert_c, 2)), 8); + spl_scratch->scl_data.taps.v_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.vert_c), 8); else spl_scratch->scl_data.taps.v_taps_c = 4; } else @@ -932,7 +948,7 @@ static bool spl_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; bool skip_easf = false; - bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format); + bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format); if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && @@ -964,7 +980,7 @@ static bool spl_get_optimal_number_of_taps( if (skip_easf) spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); else { - if (spl_is_yuv420(spl_in->basic_in.format)) { + if (spl_is_video_format(spl_in->basic_in.format)) { spl_scratch->scl_data.taps.h_taps = 6; spl_scratch->scl_data.taps.v_taps = 6; spl_scratch->scl_data.taps.h_taps_c = 4; @@ -982,8 +998,7 @@ static bool spl_get_optimal_number_of_taps( min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ - if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) - || (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10)) + if (spl_is_yuv420(spl_in->basic_in.format)) lb_config = LB_MEMORY_CONFIG_3; else lb_config = LB_MEMORY_CONFIG_0; @@ -1039,13 +1054,11 @@ static bool spl_get_optimal_number_of_taps( if (spl_scratch->scl_data.taps.h_taps_c == 5) spl_scratch->scl_data.taps.h_taps_c = 4; - if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_scratch->scl_data.taps.h_taps <= 4) || - (spl_scratch->scl_data.taps.h_taps_c <= 3)) { + if (spl_is_video_format(spl_in->basic_in.format)) { + if (spl_scratch->scl_data.taps.h_taps <= 4) { *enable_easf_v = false; *enable_easf_h = false; - } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || - (spl_scratch->scl_data.taps.v_taps_c <= 3)) { + } else if (spl_scratch->scl_data.taps.v_taps <= 3) { *enable_easf_v = false; *enable_easf_h = true; } else { @@ -1086,10 +1099,10 @@ static bool spl_get_optimal_number_of_taps( spl_scratch->scl_data.taps.h_taps = 1; spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled) spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled) spl_scratch->scl_data.taps.v_taps_c = 1; *enable_easf_v = false; @@ -1103,11 +1116,11 @@ static bool spl_get_optimal_number_of_taps( (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) spl_scratch->scl_data.taps.v_taps = 1; - if ((!*enable_easf_h) && !is_ycbcr && + if ((!*enable_easf_h) && !is_subsampled && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) spl_scratch->scl_data.taps.h_taps_c = 1; - if ((!*enable_easf_v) && !is_ycbcr && + if ((!*enable_easf_v) && !is_subsampled && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) spl_scratch->scl_data.taps.v_taps_c = 1; } @@ -1118,7 +1131,7 @@ static bool spl_get_optimal_number_of_taps( static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { - bool ycbcr = spl_dscl_is_video_format(format); + bool ycbcr = spl_is_video_format(format); if (ycbcr) { scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; @@ -1585,7 +1598,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x0; // fp1.5.10, C3 coefficient } - if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + if (spl_is_subsampled_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ dscl_prog_data->easf_matrix_mode = 1; /* * 2-bit, BF3 chroma mode correction calculation mode diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 55d557df4aa5..467af9dd90de 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -63,13 +63,13 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP8, SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ - SPL_PIXEL_FORMAT_INVALID, - SPL_PIXEL_FORMAT_422BPP8, - SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, + SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN = SPL_PIXEL_FORMAT_420BPP8, + SPL_PIXEL_FORMAT_SUBSAMPLED_END = SPL_PIXEL_FORMAT_420BPP10, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, SPL_PIXEL_FORMAT_VIDEO_END = SPL_PIXEL_FORMAT_420BPP10, + SPL_PIXEL_FORMAT_INVALID, SPL_PIXEL_FORMAT_UNKNOWN }; @@ -436,8 +436,14 @@ struct basic_in { struct spl_rect clip_rect; // Clip rect enum spl_rotation_angle rotation; // Rotation bool horizontal_mirror; // Horizontal mirror - int mpc_combine_h; // MPC Horizontal Combine Factor (split_count) - int mpc_combine_v; // MPC Vertical Combine Factor (split_idx) + struct { // previous mpc_combine_h - split count + bool use_recout_width_aligned; + union { + int mpc_num_h_slices; + int mpc_recout_width_align; + } num_slices_recout_width; + } num_h_slices_recout_width_align; + int mpc_h_slice_index; // previous mpc_combine_v - split_idx // Inputs for adaptive scaler - TODO enum spl_transfer_func_type tf_type; /* Transfer function type */ enum spl_transfer_func_predefined tf_predefined_type; /* Transfer function predefined type */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index b353c4ceb60d..4b3ccbca0da2 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -69,6 +69,9 @@ #define DMUB_PC_SNAPSHOT_COUNT 10 +/* Default tracebuffer size if meta is absent. */ +#define DMUB_TRACE_BUFFER_SIZE (64 * 1024) + /* Forward declarations */ struct dmub_srv; struct dmub_srv_common_regs; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b800a507d1e0..d0fe324cb537 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -431,7 +431,68 @@ union replay_debug_flags { */ uint32_t enable_ips_residency_profiling : 1; - uint32_t reserved : 20; + /** + * 0x1000 (bit 12) + * @enable_coasting_vtotal_check: Enable Coasting_vtotal_check + */ + uint32_t enable_coasting_vtotal_check : 1; + /** + * 0x2000 (bit 13) + * @enable_visual_confirm_debug: Enable Visual Confirm Debug + */ + uint32_t enable_visual_confirm_debug : 1; + + uint32_t reserved : 18; + } bitfields; + + uint32_t u32All; +}; + +/** + * Flags record error state. + */ +union replay_visual_confirm_error_state_flags { + struct { + /** + * 0x1 (bit 0) - Desync Error flag. + */ + uint32_t desync_error : 1; + + /** + * 0x2 (bit 1) - State Transition Error flag. + */ + uint32_t state_transition_error : 1; + + /** + * 0x4 (bit 2) - Crc Error flag + */ + uint32_t crc_error : 1; + + /** + * 0x8 (bit 3) - Reserved + */ + uint32_t reserved_3 : 1; + + /** + * 0x10 (bit 4) - Incorrect Coasting vtotal checking --> use debug flag to control DPCD write. + * Added new debug flag to control DPCD. + */ + uint32_t incorrect_vtotal_in_static_screen : 1; + + /** + * 0x20 (bit 5) - No doubled Refresh Rate. + */ + uint32_t no_double_rr : 1; + + /** + * Reserved bit 6-7 + */ + uint32_t reserved_6_7 : 2; + + /** + * Reserved bit 9-31 + */ + uint32_t reserved_9_31 : 24; } bitfields; uint32_t u32All; @@ -475,11 +536,23 @@ union replay_hw_flags { * Use TPS3 signal when restore main link. */ uint32_t force_wakeup_by_tps3 : 1; + /** + * @is_alpm_initialized: Indicates whether ALPM is initialized + */ + uint32_t is_alpm_initialized : 1; } bitfields; uint32_t u32All; }; +union fw_assisted_mclk_switch_version { + struct { + uint8_t minor : 5; + uint8_t major : 3; + }; + uint8_t ver; +}; + /** * DMUB feature capabilities. * After DMUB init, driver will query FW capabilities prior to enabling certain features. @@ -1823,52 +1896,11 @@ enum fams2_stream_type { FAMS2_STREAM_TYPE_SUBVP = 4, }; -/* dynamic stream state */ -struct dmub_fams2_legacy_stream_dynamic_state { - uint8_t force_allow_at_vblank; - uint8_t pad[3]; -}; - -struct dmub_fams2_subvp_stream_dynamic_state { - uint16_t viewport_start_hubp_vline; - uint16_t viewport_height_hubp_vlines; - uint16_t viewport_start_c_hubp_vline; - uint16_t viewport_height_c_hubp_vlines; - uint16_t phantom_viewport_height_hubp_vlines; - uint16_t phantom_viewport_height_c_hubp_vlines; - uint16_t microschedule_start_otg_vline; - uint16_t mall_start_otg_vline; - uint16_t mall_start_hubp_vline; - uint16_t mall_start_c_hubp_vline; - uint8_t force_allow_at_vblank_only; - uint8_t pad[3]; -}; - -struct dmub_fams2_drr_stream_dynamic_state { - uint16_t stretched_vtotal; - uint8_t use_cur_vtotal; - uint8_t pad; -}; - -struct dmub_fams2_stream_dynamic_state { - uint64_t ref_tick; - uint32_t cur_vtotal; - uint16_t adjusted_allow_end_otg_vline; - uint8_t pad[2]; - struct dmub_optc_position ref_otg_pos; - struct dmub_optc_position target_otg_pos; - union { - struct dmub_fams2_legacy_stream_dynamic_state legacy; - struct dmub_fams2_subvp_stream_dynamic_state subvp; - struct dmub_fams2_drr_stream_dynamic_state drr; - } sub_state; -}; - /* static stream state */ struct dmub_fams2_legacy_stream_static_state { uint8_t vactive_det_fill_delay_otg_vlines; uint8_t programming_delay_otg_vlines; -}; +}; //v0 struct dmub_fams2_subvp_stream_static_state { uint16_t vratio_numerator; @@ -1887,14 +1919,59 @@ struct dmub_fams2_subvp_stream_static_state { uint8_t phantom_otg_inst; uint8_t phantom_pipe_mask; uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough) -}; +}; //v0 struct dmub_fams2_drr_stream_static_state { uint16_t nom_stretched_vtotal; uint8_t programming_delay_otg_vlines; uint8_t only_stretch_if_required; uint8_t pad[2]; -}; +}; //v0 + +struct dmub_fams2_cmd_legacy_stream_static_state { + uint16_t vactive_det_fill_delay_otg_vlines; + uint16_t programming_delay_otg_vlines; +}; //v1 + +struct dmub_fams2_cmd_subvp_stream_static_state { + uint16_t vratio_numerator; + uint16_t vratio_denominator; + uint16_t phantom_vtotal; + uint16_t phantom_vactive; + uint16_t programming_delay_otg_vlines; + uint16_t prefetch_to_mall_otg_vlines; + union { + struct { + uint8_t is_multi_planar : 1; + uint8_t is_yuv420 : 1; + } bits; + uint8_t all; + } config; + uint8_t phantom_otg_inst; + uint8_t phantom_pipe_mask; + uint8_t pad0; + uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough) + uint8_t pad1[4 - (DMUB_MAX_PHANTOM_PLANES % 4)]; +}; //v1 + +struct dmub_fams2_cmd_drr_stream_static_state { + uint16_t nom_stretched_vtotal; + uint16_t programming_delay_otg_vlines; + uint8_t only_stretch_if_required; + uint8_t pad[3]; +}; //v1 + +union dmub_fams2_stream_static_sub_state { + struct dmub_fams2_legacy_stream_static_state legacy; + struct dmub_fams2_subvp_stream_static_state subvp; + struct dmub_fams2_drr_stream_static_state drr; +}; //v0 + +union dmub_fams2_cmd_stream_static_sub_state { + struct dmub_fams2_cmd_legacy_stream_static_state legacy; + struct dmub_fams2_cmd_subvp_stream_static_state subvp; + struct dmub_fams2_cmd_drr_stream_static_state drr; +}; //v1 struct dmub_fams2_stream_static_state { enum fams2_stream_type type; @@ -1924,13 +2001,45 @@ struct dmub_fams2_stream_static_state { uint8_t pipe_mask; // pipe mask for the whole config uint8_t num_planes; uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough) - uint8_t pad[DMUB_MAX_PLANES % 4]; + uint8_t pad[4 - (DMUB_MAX_PLANES % 4)]; + union dmub_fams2_stream_static_sub_state sub_state; +}; //v0 + +struct dmub_fams2_cmd_stream_static_base_state { + enum fams2_stream_type type; + uint32_t otg_vline_time_ns; + uint32_t otg_vline_time_ticks; + uint16_t htotal; + uint16_t vtotal; // nominal vtotal + uint16_t vblank_start; + uint16_t vblank_end; + uint16_t max_vtotal; + uint16_t allow_start_otg_vline; + uint16_t allow_end_otg_vline; + uint16_t drr_keepout_otg_vline; // after this vline, vtotal cannot be changed + uint16_t scheduling_delay_otg_vlines; // min time to budget for ready to microschedule start + uint16_t contention_delay_otg_vlines; // time to budget for contention on execution + uint16_t vline_int_ack_delay_otg_vlines; // min time to budget for vertical interrupt firing + uint16_t allow_to_target_delay_otg_vlines; // time from allow vline to target vline union { - struct dmub_fams2_legacy_stream_static_state legacy; - struct dmub_fams2_subvp_stream_static_state subvp; - struct dmub_fams2_drr_stream_static_state drr; - } sub_state; -}; + struct { + uint8_t is_drr : 1; // stream is DRR enabled + uint8_t clamp_vtotal_min : 1; // clamp vtotal to min instead of nominal + uint8_t min_ttu_vblank_usable : 1; // if min ttu vblank is above wm, no force pstate is needed in blank + } bits; + uint8_t all; + } config; + uint8_t otg_inst; + uint8_t pipe_mask; // pipe mask for the whole config + uint8_t num_planes; + uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough) + uint8_t pad[4 - (DMUB_MAX_PLANES % 4)]; +}; //v1 + +struct dmub_fams2_stream_static_state_v1 { + struct dmub_fams2_cmd_stream_static_base_state base; + union dmub_fams2_cmd_stream_static_sub_state sub_state; +}; //v1 /** * enum dmub_fams2_allow_delay_check_mode - macroscheduler mode for breaking on excessive @@ -1970,7 +2079,11 @@ struct dmub_cmd_fams2_global_config { union dmub_cmd_fams2_config { struct dmub_cmd_fams2_global_config global; - struct dmub_fams2_stream_static_state stream; + struct dmub_fams2_stream_static_state stream; //v0 + union { + struct dmub_fams2_cmd_stream_static_base_state base; + union dmub_fams2_cmd_stream_static_sub_state sub_state; + } stream_v1; //v1 }; /** @@ -3592,6 +3705,8 @@ enum dmub_cmd_replay_general_subtype { */ REPLAY_GENERAL_CMD_DISABLED_ADAPTIVE_SYNC_SDP, REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION, + REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS, + REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE, }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index a3f3ff5d49ac..15ea216e903d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -61,10 +61,6 @@ /* Default state size if meta is absent. */ #define DMUB_FW_STATE_SIZE (64 * 1024) -/* Default tracebuffer size if meta is absent. */ -#define DMUB_TRACE_BUFFER_SIZE (64 * 1024) - - /* Default scratch mem size. */ #define DMUB_SCRATCH_MEM_SIZE (1024) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 95838c7ab054..29ccd3532d13 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -996,9 +996,9 @@ void set_replay_coasting_vtotal(struct dc_link *link, link->replay_settings.coasting_vtotal_table[type] = vtotal; } -void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) +void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) { - link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; + link->replay_settings.low_rr_full_screen_video_pseudo_vtotal = vtotal; } void calculate_replay_link_off_frame_count(struct dc_link *link, @@ -1039,3 +1039,8 @@ bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_back memcpy(caps->data_points, custom_backlight_profiles[config_no].data_points, data_points_size); return true; } + +void reset_replay_dsync_error_count(struct dc_link *link) +{ + link->replay_settings.replay_desync_error_fail_count = 0; +} diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index cac302e8fa10..758a8aa31fbe 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -62,7 +62,7 @@ void set_replay_defer_update_coasting_vtotal(struct dc_link *link, uint32_t vtotal); void update_replay_coasting_vtotal_from_defer(struct dc_link *link, enum replay_coasting_vtotal_type type); -void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); +void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal); @@ -78,4 +78,5 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps); +void reset_replay_dsync_error_count(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h index a1ece3eecdf5..a08611cb8041 100644 --- a/drivers/gpu/drm/amd/include/amd_pcie.h +++ b/drivers/gpu/drm/amd/include/amd_pcie.h @@ -49,6 +49,17 @@ | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3) /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */ + +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 0x00000001 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 0x00000002 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 0x00000004 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 0x00000008 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 0x00000010 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 0x00000020 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 0x00000040 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_MASK 0x0000FFFF +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_SHIFT 0 + #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 0x00010000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 0x00020000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 0x00040000 @@ -56,6 +67,7 @@ #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 0x00100000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 0x00200000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 0x00400000 +#define CAIL_PCIE_LINK_WIDTH_SUPPORT_MASK 0xFFFF0000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT 16 /* 1/2/4/8/16 lanes */ @@ -65,4 +77,10 @@ | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \ | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) +#define AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16) + #endif diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 7eefcb0f5070..6dccee403a3d 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -344,6 +344,16 @@ enum DC_DEBUG_MASK { * eDP display from ACPI _DDC method. */ DC_DISABLE_ACPI_EDID = 0x8000, + + /** + * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver. + */ + DC_DISABLE_HDMI_CEC = 0x10000, + + /** + * @DC_DISABLE_SUBVP: If set, disable DCN Sub-Viewport feature in amdgpu driver. + */ + DC_DISABLE_SUBVP = 0x20000, }; enum amd_dpm_forced_level; @@ -401,9 +411,9 @@ struct amd_ip_funcs { int (*pre_soft_reset)(struct amdgpu_ip_block *ip_block); int (*soft_reset)(struct amdgpu_ip_block *ip_block); int (*post_soft_reset)(struct amdgpu_ip_block *ip_block); - int (*set_clockgating_state)(void *handle, + int (*set_clockgating_state)(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); - int (*set_powergating_state)(void *handle, + int (*set_powergating_state)(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); void (*dump_ip_state)(struct amdgpu_ip_block *ip_block); diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h index cae1a7e74323..73c5dd5e83d4 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h @@ -19,8 +19,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _dcn_2_0_3_OFFSET_HEADER -#define _dcn_2_0_3_OFFSET_HEADER +#ifndef _dcn_2_0_1_OFFSET_HEADER +#define _dcn_2_0_1_OFFSET_HEADER // addressBlock: dce_dc_dccg_dccg_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h index ca1e1eb39256..290d807800a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h @@ -18,8 +18,8 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _dcn_2_0_3_SH_MASK_HEADER -#define _dcn_2_0_3_SH_MASK_HEADER +#ifndef _dcn_2_0_1_SH_MASK_HEADER +#define _dcn_2_0_1_SH_MASK_HEADER // addressBlock: dce_dc_dccg_dccg_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h new file mode 100644 index 000000000000..0e8f12728d5f --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_14_0_OFFSET_HEADER +#define _umc_8_14_0_OFFSET_HEADER + +#define regUMCCH0_GeccErrCntSel 0x0328 +#define regUMCCH0_GeccErrCntSel_BASE_IDX 0 +#define regUMCCH0_GeccErrCnt 0x0329 +#define regUMCCH0_GeccErrCnt_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h new file mode 100644 index 000000000000..5d723b5d9b87 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_14_0_SH_MASK_HEADER +#define _umc_8_14_0_SH_MASK_HEADER + +//UMCCH0_GeccErrCntSel +#define UMCCH0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_GeccErrCnt +#define UMCCH0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L + +#endif diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index b0fc22383e28..0160d65f3f5e 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1300,12 +1300,17 @@ struct atom_ext_display_path //usCaps enum ext_display_path_cap_def { - EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE = 0x0001, - EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = 0x0002, - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007C, - EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x01 << 2), //PI redriver chip - EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x02 << 2), //TI retimer chip - EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x03 << 2) //Parade DP->HDMI recoverter chip + EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E, + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E, + AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x01 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x02 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__DP_EARLY_8B10B_TPS2 = (0x03 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x04 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x06 << 1), + EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x07 << 1), + EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x08 << 1), //PI redriver chip + EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x09 << 1), //TI retimer chip + EXT_DISPLAY_PATH_CAPS__AMD_INTERNAL = (0x0a << 1), //AMD internal customer chip placeholder }; struct atom_external_display_connection_info diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h new file mode 100644 index 000000000000..64b553e7de1a --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __IRQSRCS_VCN_5_0_H__ +#define __IRQSRCS_VCN_5_0_H__ + +#define VCN_5_0__SRCID__UVD_TRAP 114 // 0x72 UVD_TRAP +#define VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE 119 // 0x77 Encoder General Purpose +#define VCN_5_0__SRCID__UVD_ENC_LOW_LATENCY 120 // 0x78 Encoder Low Latency +#define VCN_5_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT 124 // 0x7c UVD system message interrupt +#define VCN_5_0__SRCID__JPEG_ENCODE 151 // 0x97 JRBC Encode interrupt +#define VCN_5_0__SRCID__JPEG_DECODE 153 // 0x99 JRBC Decode interrupt +#define VCN_5_0__SRCID__JPEG1_DECODE 149 // 0x95 JRBC1 Decode interrupt +#define VCN_5_0__SRCID__JPEG2_DECODE 151 // 0x97 JRBC2 Decode interrupt +#define VCN_5_0__SRCID__JPEG3_DECODE 171 // 0xab JRBC3 Decode interrupt +#define VCN_5_0__SRCID__JPEG4_DECODE 172 // 0xac JRBC4 Decode interrupt +#define VCN_5_0__SRCID__JPEG5_DECODE 173 // 0xad JRBC5 Decode interrupt +#define VCN_5_0__SRCID__JPEG6_DECODE 174 // 0xae JRBC6 Decode interrupt +#define VCN_5_0__SRCID__JPEG7_DECODE 175 // 0xaf JRBC7 Decode interrupt +#define VCN_5_0__SRCID__JPEG8_DECODE 177 // 0xb1 JRBC8 Decode interrupt +#define VCN_5_0__SRCID__JPEG9_DECODE 178 // 0xb2 JRBC9 Decode interrupt + +#define VCN_5_0__SRCID_UVD_POISON 160 +#define VCN_5_0__SRCID_DJPEG0_POISON 161 +#define VCN_5_0__SRCID_EJPEG0_POISON 162 +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 67a5de573943..9189dcb65188 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -164,6 +164,7 @@ enum amd_pp_task { }; enum PP_SMC_POWER_PROFILE { + PP_SMC_POWER_PROFILE_UNKNOWN = -1, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, PP_SMC_POWER_PROFILE_POWERSAVING = 0x2, @@ -420,7 +421,9 @@ struct amd_pm_funcs { int (*load_firmware)(void *handle); int (*wait_for_fw_loading_complete)(void *handle); int (*set_powergating_by_smu)(void *handle, - uint32_t block_type, bool gate); + uint32_t block_type, + bool gate, + int inst); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); int (*set_power_limit)(void *handle, uint32_t n); int (*get_power_limit)(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 9dc82f4d7c93..6a9e26905edf 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -70,13 +70,18 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) return ret; } -int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate) +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, + uint32_t block_type, + bool gate, + int inst) { int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON; + bool is_vcn = (block_type == AMD_IP_BLOCK_TYPE_UVD || block_type == AMD_IP_BLOCK_TYPE_VCN); - if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state) { + if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state && + (!is_vcn || adev->vcn.num_vcn_inst == 1)) { dev_dbg(adev->dev, "IP block%d already in the target %s state!", block_type, gate ? "gate" : "ungate"); return 0; @@ -88,7 +93,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCE: case AMD_IP_BLOCK_TYPE_GFX: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_SDMA: case AMD_IP_BLOCK_TYPE_JPEG: case AMD_IP_BLOCK_TYPE_GMC: @@ -96,7 +100,12 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_VPE: if (pp_funcs && pp_funcs->set_powergating_by_smu) ret = (pp_funcs->set_powergating_by_smu( - (adev)->powerplay.pp_handle, block_type, gate)); + (adev)->powerplay.pp_handle, block_type, gate, 0)); + break; + case AMD_IP_BLOCK_TYPE_VCN: + if (pp_funcs && pp_funcs->set_powergating_by_smu) + ret = (pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate, inst)); break; default: break; @@ -566,7 +575,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) return; } - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable, 0); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); +} + +void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst) +{ + int ret = 0; + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCN, !enable, inst); if (ret) DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -591,7 +610,7 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) return; } - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable, 0); if (ret) DRM_ERROR("Dpm %s vce failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -601,7 +620,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable, 0); if (ret) DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -611,7 +630,7 @@ void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable, 0); if (ret) DRM_ERROR("Dpm %s vpe failed, ret = %d.\n", enable ? "enable" : "disable", ret); @@ -700,6 +719,21 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) return ret; } +int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_sdma(smu, inst_mask); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, @@ -953,6 +987,24 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device return level; } +static void amdgpu_dpm_enter_umd_state(struct amdgpu_device *adev) +{ + /* enter UMD Pstate */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); +} + +static void amdgpu_dpm_exit_umd_state(struct amdgpu_device *adev) +{ + /* exit UMD Pstate */ + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); +} + int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, enum amd_dpm_forced_level level) { @@ -973,6 +1025,10 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, if (current_level == level) return 0; + if (!(current_level & profile_mode_mask) && + (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) + return -EINVAL; + if (adev->asic_type == CHIP_RAVEN) { if (!(adev->apu_flags & AMD_APU_IS_RAVEN2)) { if (current_level != AMD_DPM_FORCED_LEVEL_MANUAL && @@ -984,35 +1040,25 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, } } - if (!(current_level & profile_mode_mask) && - (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) - return -EINVAL; - - if (!(current_level & profile_mode_mask) && - (level & profile_mode_mask)) { - /* enter UMD Pstate */ - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_UNGATE); - amdgpu_device_ip_set_clockgating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); - } else if ((current_level & profile_mode_mask) && - !(level & profile_mode_mask)) { - /* exit UMD Pstate */ - amdgpu_device_ip_set_clockgating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_GATE); - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_GATE); - } + if (!(current_level & profile_mode_mask) && (level & profile_mode_mask)) + amdgpu_dpm_enter_umd_state(adev); + else if ((current_level & profile_mode_mask) && + !(level & profile_mode_mask)) + amdgpu_dpm_exit_umd_state(adev); mutex_lock(&adev->pm.mutex); if (pp_funcs->force_performance_level(adev->powerplay.pp_handle, level)) { mutex_unlock(&adev->pm.mutex); + /* If new level failed, retain the umd state as before */ + if (!(current_level & profile_mode_mask) && + (level & profile_mode_mask)) + amdgpu_dpm_exit_umd_state(adev); + else if ((current_level & profile_mode_mask) && + !(level & profile_mode_mask)) + amdgpu_dpm_enter_umd_state(adev); + return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 363af8990aa2..1f5ac7e0230d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -397,7 +397,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit); int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, - uint32_t block_type, bool gate); + uint32_t block_type, bool gate, int inst); extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); @@ -446,6 +446,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable); @@ -601,5 +602,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type, int policy_level); ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, enum pp_pm_policy p_type, char *buf); +int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 8908646ad620..67a8e22b1126 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3177,13 +3177,13 @@ static int kv_dpm_process_interrupt(struct amdgpu_device *adev, return 0; } -static int kv_dpm_set_clockgating_state(void *handle, +static int kv_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int kv_dpm_set_powergating_state(void *handle, +static int kv_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3276,7 +3276,9 @@ static int kv_dpm_read_sensor(void *handle, int idx, } static int kv_set_powergating_by_smu(void *handle, - uint32_t block_type, bool gate) + uint32_t block_type, + bool gate, + int inst) { switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index ee23a0f897c5..a87dcf0974bc 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7709,7 +7709,8 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) default: BUG(); } - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s_smc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_smc.bin", chip_name); if (err) { DRM_ERROR("si_smc: Failed to load firmware. err = %d\"%s_smc.bin\"\n", err, chip_name); @@ -7849,13 +7850,13 @@ static int si_dpm_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int si_dpm_set_clockgating_state(void *handle, +static int si_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_dpm_set_powergating_state(void *handle, +static int si_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 26624a716fc6..686345f75f26 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -244,7 +244,7 @@ static bool pp_is_idle(void *handle) return false; } -static int pp_set_powergating_state(void *handle, +static int pp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -267,7 +267,7 @@ static int pp_resume(struct amdgpu_ip_block *ip_block) return hwmgr_resume(hwmgr); } -static int pp_set_clockgating_state(void *handle, +static int pp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; @@ -1227,7 +1227,9 @@ static void pp_dpm_powergate_sdma(void *handle, bool gate) } static int pp_set_powergating_by_smu(void *handle, - uint32_t block_type, bool gate) + uint32_t block_type, + bool gate, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index fe24219c3bf4..4bd92fd782be 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -992,6 +992,8 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr GetIndexIntoMasterTable(DATA, SMU_Info), &size, &frev, &crev); + if (!psmu_info) + return -EINVAL; for (i = 0; i < psmu_info->ucSclkEntryNum; i++) { table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c index 3007b054c873..776d58ea63ae 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c @@ -1120,13 +1120,14 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); if (0 != result) - return result; + goto exit_safe_mode; vega10_didt_set_mask(hwmgr, false); +exit_safe_mode: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return 0; + return result; } static int vega10_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 5eae14fe79f1..8ca793c222ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -238,7 +238,8 @@ static bool is_vcn_enabled(struct amdgpu_device *adev) } static int smu_dpm_set_vcn_enable(struct smu_context *smu, - bool enable) + bool enable, + int inst) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; @@ -253,12 +254,12 @@ static int smu_dpm_set_vcn_enable(struct smu_context *smu, if (!smu->ppt_funcs->dpm_set_vcn_enable) return 0; - if (atomic_read(&power_gate->vcn_gated) ^ enable) + if (atomic_read(&power_gate->vcn_gated[inst]) ^ enable) return 0; - ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, 0xff); + ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, inst); if (!ret) - atomic_set(&power_gate->vcn_gated, !enable); + atomic_set(&power_gate->vcn_gated[inst], !enable); return ret; } @@ -345,8 +346,9 @@ static int smu_set_mall_enable(struct smu_context *smu) * smu_dpm_set_power_gate - power gate/ungate the specific IP block * * @handle: smu_context pointer - * @block_type: the IP block to power gate/ungate - * @gate: to power gate if true, ungate otherwise + * @block_type: the IP block to power gate/ungate + * @gate: to power gate if true, ungate otherwise + * @inst: the instance of the IP block to power gate/ungate * * This API uses no smu->mutex lock protection due to: * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). @@ -357,7 +359,8 @@ static int smu_set_mall_enable(struct smu_context *smu) */ static int smu_dpm_set_power_gate(void *handle, uint32_t block_type, - bool gate) + bool gate, + int inst) { struct smu_context *smu = handle; int ret = 0; @@ -376,10 +379,10 @@ static int smu_dpm_set_power_gate(void *handle, */ case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCN: - ret = smu_dpm_set_vcn_enable(smu, !gate); + ret = smu_dpm_set_vcn_enable(smu, !gate, inst); if (ret) - dev_err(smu->adev->dev, "Failed to power %s VCN!\n", - gate ? "gate" : "ungate"); + dev_err(smu->adev->dev, "Failed to power %s VCN instance %d!\n", + gate ? "gate" : "ungate", inst); break; case AMD_IP_BLOCK_TYPE_GFX: ret = smu_gfx_off_control(smu, gate); @@ -724,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): smu_v13_0_6_set_ppt_funcs(smu); /* Enable pp_od_clk_voltage node */ smu->od_enabled = true; @@ -764,6 +768,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block) smu->smu_baco.platform_support = false; smu->smu_baco.maco_support = false; smu->user_dpm_profile.fan_mode = -1; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN; mutex_init(&smu->message_lock); @@ -781,21 +786,25 @@ static int smu_set_default_dpm_table(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; - int vcn_gate, jpeg_gate; + int vcn_gate[AMDGPU_MAX_VCN_INSTANCES], jpeg_gate, i; int ret = 0; if (!smu->ppt_funcs->set_default_dpm_table) return 0; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN) - vcn_gate = atomic_read(&power_gate->vcn_gated); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + vcn_gate[i] = atomic_read(&power_gate->vcn_gated[i]); + } if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) jpeg_gate = atomic_read(&power_gate->jpeg_gated); if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { - ret = smu_dpm_set_vcn_enable(smu, true); - if (ret) - return ret; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + ret = smu_dpm_set_vcn_enable(smu, true, i); + if (ret) + return ret; + } } if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { @@ -812,8 +821,10 @@ static int smu_set_default_dpm_table(struct smu_context *smu) if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) smu_dpm_set_jpeg_enable(smu, !jpeg_gate); err_out: - if (adev->pg_flags & AMD_PG_SUPPORT_VCN) - smu_dpm_set_vcn_enable(smu, !vcn_gate); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, !vcn_gate[i], i); + } return ret; } @@ -1248,11 +1259,26 @@ static bool smu_is_workload_profile_available(struct smu_context *smu, return smu->workload_map && smu->workload_map[profile].valid_mapping; } +static void smu_init_power_profile(struct smu_context *smu) +{ + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) { + if (smu->is_apu || + !smu_is_workload_profile_available( + smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + else + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu_power_profile_mode_get(smu, smu->power_profile_mode); +} + static int smu_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; + int i, ret; smu->pool_size = adev->pm.smu_prv_buffer_size; smu->smu_feature.feature_num = SMU_FEATURE_MAX; @@ -1264,18 +1290,13 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) atomic64_set(&smu->throttle_int_counter, 0); smu->watermarks_bitmap = 0; - atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - else - smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - smu_power_profile_mode_get(smu, smu->power_profile_mode); - + smu_init_power_profile(smu); smu->display_config = &adev->pm.pm_display_cfg; smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; @@ -1800,7 +1821,7 @@ static int smu_start_smc_engine(struct smu_context *smu) static int smu_hw_init(struct amdgpu_ip_block *ip_block) { - int ret; + int i, ret; struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; @@ -1826,7 +1847,8 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block) ret = smu_set_gfx_imu_enable(smu); if (ret) return ret; - smu_dpm_set_vcn_enable(smu, true); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, true, i); smu_dpm_set_jpeg_enable(smu, true); smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); @@ -2024,12 +2046,13 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; + int i, ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - smu_dpm_set_vcn_enable(smu, false); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, false, i); smu_dpm_set_jpeg_enable(smu, false); smu_dpm_set_vpe_enable(smu, false); smu_dpm_set_umsch_mm_enable(smu, false); @@ -2181,13 +2204,13 @@ static int smu_display_configuration_change(void *handle, return 0; } -static int smu_set_clockgating_state(void *handle, +static int smu_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int smu_set_powergating_state(void *handle, +static int smu_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -2969,9 +2992,10 @@ static int smu_read_sensor(void *handle, int *size_arg) { struct smu_context *smu = handle; + struct amdgpu_device *adev = smu->adev; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; - int ret = 0; + int i, ret = 0; uint32_t *size, size_val; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) @@ -3017,7 +3041,13 @@ static int smu_read_sensor(void *handle, *size = 4; break; case AMDGPU_PP_SENSOR_VCN_POWER_STATE: - *(uint32_t *)data = atomic_read(&smu->smu_power.power_gate.vcn_gated) ? 0 : 1; + *(uint32_t *)data = 0; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (!atomic_read(&smu->smu_power.power_gate.vcn_gated[i])) { + *(uint32_t *)data = 1; + break; + } + } *size = 4; break; case AMDGPU_PP_SENSOR_MIN_FAN_RPM: @@ -3885,3 +3915,13 @@ int smu_send_rma_reason(struct smu_context *smu) return ret; } + +int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) +{ + int ret = 0; + + if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma) + ret = smu->ppt_funcs->reset_sdma(smu, inst_mask); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3925815358ce..3630593bce61 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -399,7 +399,7 @@ struct smu_dpm_context { struct smu_power_gate { bool uvd_gated; bool vce_gated; - atomic_t vcn_gated; + atomic_t vcn_gated[AMDGPU_MAX_VCN_INSTANCES]; atomic_t jpeg_gated; atomic_t vpe_gated; atomic_t umsch_mm_gated; @@ -1373,6 +1373,11 @@ struct pptable_funcs { int (*send_rma_reason)(struct smu_context *smu); /** + * @reset_sdma: message SMU to soft reset sdma instance. + */ + int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask); + + /** * @get_ecc_table: message SMU to get ECC INFO table. */ ssize_t (*get_ecc_info)(struct smu_context *smu, void *table); @@ -1631,6 +1636,7 @@ void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size); int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); int smu_send_rma_reason(struct smu_context *smu); +int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 0f96b8c59a0e..274b3e1cc4fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -34,6 +34,8 @@ #define NUM_PCIE_BITRATES 4 #define NUM_XGMI_BITRATES 4 #define NUM_XGMI_WIDTHS 3 +#define NUM_SOC_P2S_TABLES 3 +#define NUM_TDP_GROUPS 4 typedef enum { /*0*/ FEATURE_DATA_CALCULATION = 0, @@ -80,8 +82,10 @@ typedef enum { /*41*/ FEATURE_CXL_QOS = 41, /*42*/ FEATURE_SOC_DC_RTC = 42, /*43*/ FEATURE_GFX_DC_RTC = 43, +/*44*/ FEATURE_DVM_MIN_PSM = 44, +/*45*/ FEATURE_PRC = 45, -/*44*/ NUM_FEATURES = 44 +/*46*/ NUM_FEATURES = 46 } FEATURE_LIST_e; //enum for MPIO PCIe gen speed msgs @@ -123,7 +127,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xE +#define SMU_METRICS_TABLE_VERSION 0xF typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -234,6 +238,9 @@ typedef struct __attribute__((packed, aligned(4))) { //PCIE BW Data and error count uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated + + //Total App Clock Counter + uint64_t GfxclkBelowHostLimitAcc[8]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { @@ -328,13 +335,14 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t JpegBusy[32]; } MetricsTableA_t; -#define SMU_VF_METRICS_TABLE_VERSION 0x3 +#define SMU_VF_METRICS_TABLE_VERSION 0x5 typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; uint32_t InstGfxclk_TargFreq; uint64_t AccGfxclk_TargFreq; uint64_t AccGfxRsmuDpm_Busy; + uint64_t AccGfxclkBelowHostLimit; } VfMetricsTable_t; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41cb681927e2..147bfb12fd75 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -93,7 +93,8 @@ #define PPSMC_MSG_SelectPLPDMode 0x40 #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SelectPstatePolicy 0x44 -#define PPSMC_Message_Count 0x45 +#define PPSMC_MSG_ResetSDMA 0x4D +#define PPSMC_Message_Count 0x4E //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a299dc4a8071..e4cd6a0d13da 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -275,7 +275,8 @@ __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \ __SMU_DUMMY_MAP(SelectPstatePolicy), \ __SMU_DUMMY_MAP(MALLPowerController), \ - __SMU_DUMMY_MAP(MALLPowerState), + __SMU_DUMMY_MAP(MALLPowerState), \ + __SMU_DUMMY_MAP(ResetSDMA), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index ae3563d71fa0..8d4a96e23326 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -107,6 +107,7 @@ struct smu_13_0_dpm_context { struct smu_13_0_dpm_tables dpm_tables; uint32_t workload_policy_mask; uint32_t dcef_min_ds_clk; + uint64_t caps; }; enum smu_13_0_power_state { @@ -303,5 +304,7 @@ int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value); + +void smu_v13_0_interrupt_work(struct smu_context *smu); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 286777ada1df..19a25fdc2f5b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1157,19 +1157,15 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* vcn dpm on is a prerequisite for vcn power gate messages */ - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - 0x10000 * i, NULL); - if (ret) - return ret; - } + if (adev->vcn.harvest_config & (1 << inst)) + return ret; + /* vcn dpm on is a prerequisite for vcn power gate messages */ + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + 0x10000 * inst, NULL); } return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 480cf3cb204d..189c6a32b6bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -105,7 +105,8 @@ int smu_v11_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f6b029354327..83163d7c7f00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 2bfea740dace..fbbdfa54f6a2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -103,7 +103,8 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -1320,11 +1321,11 @@ static int smu_v13_0_set_irq_state(struct amdgpu_device *adev, return 0; } -static int smu_v13_0_ack_ac_dc_interrupt(struct smu_context *smu) +void smu_v13_0_interrupt_work(struct smu_context *smu) { - return smu_cmn_send_smc_msg(smu, - SMU_MSG_ReenableAcDcInterrupt, - NULL); + smu_cmn_send_smc_msg(smu, + SMU_MSG_ReenableAcDcInterrupt, + NULL); } #define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */ @@ -1377,12 +1378,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, switch (ctxid) { case SMU_IH_INTERRUPT_CONTEXT_ID_AC: dev_dbg(adev->dev, "Switched to AC mode!\n"); - smu_v13_0_ack_ac_dc_interrupt(smu); + schedule_work(&smu->interrupt_work); adev->pm.ac_power = true; break; case SMU_IH_INTERRUPT_CONTEXT_ID_DC: dev_dbg(adev->dev, "Switched to DC mode!\n"); - smu_v13_0_ack_ac_dc_interrupt(smu); + schedule_work(&smu->interrupt_work); adev->pm.ac_power = false; break; case SMU_IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING: @@ -2108,18 +2109,14 @@ int smu_v13_0_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << inst)) + return ret; - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); - if (ret) - return ret; - } + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + inst << 16U, NULL); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3aa705aae4c0..0551a3311217 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2643,11 +2643,12 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, &backend_workload_mask); /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ - if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && - ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7300))) || - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && - smu->adev->pm.fw_version >= 0x00504500)) { + if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) && + ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || + (smu->adev->pm.fw_version >= 0x004e7300))) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + smu->adev->pm.fw_version >= 0x00504500))) { workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); @@ -3219,6 +3220,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check, .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, + .interrupt_work = smu_v13_0_interrupt_work, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ab3c93ddce46..da7bd9227afe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -101,23 +101,24 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -static inline bool smu_v13_0_6_is_unified_metrics(struct smu_context *smu) -{ - return (smu->adev->flags & AMD_IS_APU) && - smu->smc_fw_version <= 0x4556900; -} - -static inline bool smu_v13_0_6_is_other_end_count_available(struct smu_context *smu) -{ - switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - return smu->smc_fw_version >= 0x557600; - case IP_VERSION(13, 0, 14): - return smu->smc_fw_version >= 0x05550E00; - default: - return false; - } -} +#define SMU_CAP(x) SMU_13_0_6_CAPS_##x + +enum smu_v13_0_6_caps { + SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(DPM_POLICY), + SMU_CAP(OTHER_END_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(PCIE_METRICS), + SMU_CAP(HST_LIMIT_METRICS), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(PER_INST_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND), + SMU_CAP(SDMA_RESET), + SMU_CAP(ALL), +}; struct mca_bank_ipid { enum amdgpu_mca_ip ip; @@ -193,6 +194,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SelectPstatePolicy, PPSMC_MSG_SelectPstatePolicy, 0), + MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), }; // clang-format on @@ -280,6 +282,162 @@ struct smu_v13_0_6_dpm_map { uint32_t *freq_table; }; +static inline void smu_v13_0_6_cap_set(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + dpm_context->caps |= BIT_ULL(cap); +} + +static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + dpm_context->caps &= ~BIT_ULL(cap); +} + +static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + return !!(dpm_context->caps & BIT_ULL(cap)); +} + +static void smu_v13_0_14_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(DPM_POLICY), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + uint32_t fw_ver = smu->smc_fw_version; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver >= 0x05550E00) + smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); + if (fw_ver >= 0x05551000) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + if (fw_ver >= 0x05550B00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + if (fw_ver >= 0x5551200) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_12_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + uint32_t fw_ver = smu->smc_fw_version; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver < 0x00561900) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); + + if (fw_ver >= 0x00561700) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_6_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(DPM_POLICY), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + struct amdgpu_device *adev = smu->adev; + uint32_t fw_ver = smu->smc_fw_version; + uint32_t pgm = (fw_ver >> 24) & 0xFF; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver < 0x552F00) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); + if (fw_ver < 0x554500) + smu_v13_0_6_cap_clear(smu, SMU_CAP(CTF_LIMIT)); + + if (adev->flags & AMD_IS_APU) { + smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + + if (fw_ver <= 0x4556900) + smu_v13_0_6_cap_clear(smu, SMU_CAP(UNI_METRICS)); + if (fw_ver >= 0x04556F00) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + if (fw_ver >= 0x04556A00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + } else { + if (fw_ver >= 0x557600) + smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); + if (fw_ver < 0x00556000) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); + if (amdgpu_sriov_vf(adev) && (fw_ver < 0x556600)) + smu_v13_0_6_cap_clear(smu, SMU_CAP(SET_UCLK_MAX)); + if (fw_ver < 0x556300) + smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); + if (fw_ver < 0x554800) + smu_v13_0_6_cap_clear(smu, SMU_CAP(MCA_DEBUG_MODE)); + if (fw_ver >= 0x556F00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + if (fw_ver < 0x00555a00) + smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); + if (fw_ver < 0x00555600) + smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + if (pgm == 0 && fw_ver >= 0x557900) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + } + if (((pgm == 7) && (fw_ver >= 0x7550700)) || + ((pgm == 0) && (fw_ver >= 0x00557900)) || + ((pgm == 4) && (fw_ver >= 0x4557000))) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_x_init_caps(struct smu_context *smu) +{ + switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 12): + return smu_v13_0_12_init_caps(smu); + case IP_VERSION(13, 0, 14): + return smu_v13_0_14_init_caps(smu); + default: + return smu_v13_0_6_init_caps(smu); + } +} + +static int smu_v13_0_6_check_fw_version(struct smu_context *smu) +{ + int r; + + r = smu_v13_0_check_fw_version(smu); + /* Initialize caps flags once fw version is fetched */ + if (!r) + smu_v13_0_x_init_caps(smu); + + return r; +} + static int smu_v13_0_6_init_microcode(struct smu_context *smu) { const struct smc_firmware_header_v2_1 *v2_1; @@ -304,7 +462,8 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - ret = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (ret) goto out; @@ -600,7 +759,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); int ret, i, retry = 100; uint32_t table_version; @@ -796,8 +955,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) smu_v13_0_6_setup_driver_pptable(smu); /* DPM policy not supported in older firmwares */ - if (!(smu->adev->flags & AMD_IS_APU) && - (smu->smc_fw_version < 0x00556000)) { + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM_POLICY))) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; smu_dpm->dpm_policies->policy_mask &= @@ -974,7 +1132,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -987,7 +1145,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, switch (member) { case METRICS_CURR_GFXCLK: case METRICS_AVERAGE_GFXCLK: - if (smu->smc_fw_version >= 0x552F00) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { xcc_id = GET_INST(GC, 0); *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); } else { @@ -1674,7 +1832,7 @@ static int smu_v13_0_6_notify_unload(struct smu_context *smu) static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) { /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */ - if (smu->smc_fw_version < 0x554800) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(MCA_DEBUG_MODE))) return 0; return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, @@ -1819,9 +1977,8 @@ static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu, if (max == pstate_table->uclk_pstate.curr.max) return 0; /* For VF, only allowed in FW versions 85.102 or greater */ - if (amdgpu_sriov_vf(adev) && - ((smu->smc_fw_version < 0x556600) || - (adev->flags & AMD_IS_APU))) + if (!smu_v13_0_6_cap_supported(smu, + SMU_CAP(SET_UCLK_MAX))) return -EOPNOTSUPP; /* Only max clock limiting is allowed for UCLK */ ret = smu_v13_0_set_soft_freq_limited_range( @@ -2025,7 +2182,7 @@ static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu, ret = smu_cmn_get_enabled_mask(smu, feature_mask); - if (ret == -EIO && smu->smc_fw_version < 0x552F00) { + if (ret == -EIO && !smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { *feature_mask = 0; ret = 0; } @@ -2318,11 +2475,10 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { - bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; struct smu_table_context *smu_table = &smu->smu_table; struct gpu_metrics_v1_7 *gpu_metrics = (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; MetricsTableX_t *metrics_x; @@ -2330,6 +2486,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct amdgpu_xcp *xcp; u16 link_width_level; u32 inst_mask; + bool per_inst; metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); @@ -2356,6 +2513,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->average_umc_activity = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); + gpu_metrics->mem_max_bandwidth = + SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, flag)); + gpu_metrics->curr_socket_power = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)); /* Energy counter reported in 15.259uJ (2^-16) units */ @@ -2400,7 +2560,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table * table for both pf & one vf for smu version 85.99.0 or higher else report only * for pf from registers */ - if (smu->smc_fw_version >= 0x556300) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { gpu_metrics->pcie_link_width = metrics_x->PCIeLinkWidth; gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics_x->PCIeLinkSpeed); @@ -2429,7 +2589,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_x->PCIeNAKSentCountAcc; gpu_metrics->pcie_nak_rcvd_count_acc = metrics_x->PCIeNAKReceivedCountAcc; - if (smu_v13_0_6_is_other_end_count_available(smu)) + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) gpu_metrics->pcie_lc_perf_other_end_recovery = metrics_x->PCIeOtherEndRecoveryAcc; @@ -2454,17 +2614,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->num_partition = adev->xcp_mgr->num_xcps; - apu_per_inst = (adev->flags & AMD_IS_APU) && (smu->smc_fw_version >= 0x04556A00); - smu_13_0_6_per_inst = !(adev->flags & AMD_IS_APU) && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) - == IP_VERSION(13, 0, 6)) && - (smu->smc_fw_version >= 0x556F00); - smu_13_0_14_per_inst = !(adev->flags & AMD_IS_APU) && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) - == IP_VERSION(13, 0, 14)) && - (smu->smc_fw_version >= 0x05550B00); - - per_inst = apu_per_inst || smu_13_0_6_per_inst || smu_13_0_14_per_inst; + per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); for_each_xcp(adev->xcp_mgr, xcp, i) { amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); @@ -2494,6 +2644,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(metrics_x->GfxBusy[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]); + + if (smu_v13_0_6_cap_supported( + smu, SMU_CAP(HST_LIMIT_METRICS))) + gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = + SMUQ10_ROUND(metrics_x->GfxclkBelowHostLimitAcc + [inst]); idx++; } } @@ -2598,7 +2754,7 @@ static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, return -EINVAL; /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ - if (smu->smc_fw_version < 0x554500) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(CTF_LIMIT))) return 0; /* Get SOC Max operating temperature */ @@ -2700,11 +2856,10 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) { - struct amdgpu_device *adev = smu->adev; int ret; /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */ - if ((adev->flags & AMD_IS_APU) || smu->smc_fw_version < 0x00555a00) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(RMA_MSG))) return 0; ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL); @@ -2716,6 +2871,23 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) return ret; } +static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) +{ + int ret = 0; + + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) + return -EOPNOTSUPP; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_ResetSDMA, inst_mask, NULL); + if (ret) + dev_err(smu->adev->dev, + "failed to send ResetSDMA event with mask 0x%x\n", + inst_mask); + + return ret; +} + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3026,7 +3198,7 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd if (instlo != 0x03b30400) return false; - if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) { errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); errcode &= 0xff; } else { @@ -3312,9 +3484,10 @@ static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) { + struct smu_context *smu = adev->powerplay.pp_handle; int error_code; - if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); else error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); @@ -3352,7 +3525,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_6_check_fw_status, /* pptable related */ - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_v13_0_6_check_fw_version, .set_driver_table_location = smu_v13_0_set_driver_table_location, .set_tool_table_location = smu_v13_0_set_tool_table_location, .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location, @@ -3385,6 +3558,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, .send_rma_reason = smu_v13_0_6_send_rma_reason, + .reset_sdma = smu_v13_0_6_reset_sdma, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index f4ac403b8b36..55ef18517b0f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2797,6 +2797,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, + .interrupt_work = smu_v13_0_interrupt_work, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) @@ -2810,4 +2811,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) smu->workload_map = smu_v13_0_7_workload_map; smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; smu_v13_0_set_smu_mailbox_registers(smu); + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index a87040cb2f2e..9b2f4fe1578b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -79,7 +79,8 @@ int smu_v14_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -1511,29 +1512,24 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << inst)) + return ret; - if (smu->is_apu) { - if (i == 0) - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, - i << 16U, NULL); - else if (i == 1) - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, - i << 16U, NULL); - } else { + if (smu->is_apu) { + if (inst == 0) ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); - } - - if (ret) - return ret; + SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, + inst << 16U, NULL); + else if (inst == 1) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, + inst << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + inst << 16U, NULL); } return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 6a565ce74d5b..5cad09c5f2ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2096,7 +2096,7 @@ static int smu_v14_0_2_enable_gfx_features(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(14, 0, 2)) + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 2)) return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableAllSmuFeatures, FEATURE_PWR_GFX, NULL); else diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 36a7b6f085f9..1ff8c815ec79 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -153,7 +153,16 @@ static int adv7511_hdmi_hw_params(struct device *dev, void *data, ADV7511_AUDIO_CFG3_LEN_MASK, len); regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG, ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4); - regmap_write(adv7511->regmap, 0x73, 0x1); + + /* send current Audio infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), BIT(5)); + + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(0), 0x1); + + /* use Audio infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), 0); return 0; } @@ -184,8 +193,9 @@ static int audio_startup(struct device *dev, void *data) regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(0), BIT(7) | BIT(6), BIT(7)); /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(1), + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, BIT(5), 0); + /* enable SPDIF receiver */ if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 1a76aea6a945..19f4319eb647 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1247,8 +1247,10 @@ static int adv7511_probe(struct i2c_client *i2c) return ret; ret = adv7511_init_regulators(adv7511); - if (ret) - return dev_err_probe(dev, ret, "failed to init regulators\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to init regulators\n"); + goto err_of_node_put; + } /* * The power down GPIO is optional. If present, toggle it from active to @@ -1369,6 +1371,8 @@ err_i2c_unregister_edid: i2c_unregister_device(adv7511->i2c_edid); uninit_regulators: adv7511_uninit_regulators(adv7511); +err_of_node_put: + of_node_put(adv7511->host_node); return ret; } @@ -1377,6 +1381,8 @@ static void adv7511_remove(struct i2c_client *i2c) { struct adv7511 *adv7511 = i2c_get_clientdata(i2c); + of_node_put(adv7511->host_node); + adv7511_uninit_regulators(adv7511); drm_bridge_remove(&adv7511->bridge); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c index 4481489aaf5e..122ad91e8a32 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c @@ -172,7 +172,7 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) of_property_read_u32(np, "adi,dsi-lanes", &num_lanes); - if (num_lanes < 1 || num_lanes > 4) + if (num_lanes < 2 || num_lanes > 4) return -EINVAL; adv->num_dsi_lanes = num_lanes; @@ -181,8 +181,6 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) if (!adv->host_node) return -ENODEV; - of_node_put(adv->host_node); - adv->use_timing_gen = !of_property_read_bool(np, "adi,disable-timing-generator"); diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index 45af49a1e90f..306b5e374b9e 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -854,8 +854,8 @@ static int it6263_probe(struct i2c_client *client) it->lvds_i2c = devm_i2c_new_dummy_device(dev, client->adapter, LVDS_INPUT_CTRL_I2C_ADDR); if (IS_ERR(it->lvds_i2c)) - dev_err_probe(it->dev, PTR_ERR(it->lvds_i2c), - "failed to allocate I2C device for LVDS\n"); + return dev_err_probe(it->dev, PTR_ERR(it->lvds_i2c), + "failed to allocate I2C device for LVDS\n"); it->lvds_regmap = devm_regmap_init_i2c(it->lvds_i2c, &it6263_lvds_regmap_config); diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 0397e62f9cbc..30c736fc0067 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -596,6 +596,9 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, return ERR_PTR(-EINVAL); if (bridge_connector->bridge_hdmi) { + if (!connector->ycbcr_420_allowed) + supported_formats &= ~BIT(HDMI_COLORSPACE_YUV420); + bridge = bridge_connector->bridge_hdmi; ret = drmm_connector_hdmi_init(drm, connector, diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index e79f4b76ec2b..8b68bb3fbffb 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3583,8 +3583,7 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, } /** - * drm_dp_get_vc_payload_bw - get the VC payload BW for an MST link - * @mgr: The &drm_dp_mst_topology_mgr to use + * drm_dp_get_vc_payload_bw - get the VC payload BW for an MTP link * @link_rate: link rate in 10kbits/s units * @link_lane_count: lane count * @@ -3595,17 +3594,12 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, * * Returns the BW / timeslot value in 20.12 fixed point format. */ -fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, - int link_rate, int link_lane_count) +fixed20_12 drm_dp_get_vc_payload_bw(int link_rate, int link_lane_count) { int ch_coding_efficiency = drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(link_rate)); fixed20_12 ret; - if (link_rate == 0 || link_lane_count == 0) - drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n", - link_rate, link_lane_count); - /* See DP v2.0 2.6.4.2, 2.7.6.3 VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ ret.full = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_rate * link_lane_count, ch_coding_efficiency), diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index 48b2df120086..90fe07a89260 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -1896,8 +1896,8 @@ static void destroy_mgr(struct drm_dp_tunnel_mgr *mgr) * * Creates a DP tunnel manager for @dev. * - * Returns a pointer to the tunnel manager if created successfully or NULL in - * case of an error. + * Returns a pointer to the tunnel manager if created successfully or error + * pointer in case of failure. */ struct drm_dp_tunnel_mgr * drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) @@ -1907,7 +1907,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) - return NULL; + return ERR_PTR(-ENOMEM); mgr->dev = dev; init_waitqueue_head(&mgr->bw_req_queue); @@ -1916,7 +1916,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) if (!mgr->groups) { kfree(mgr); - return NULL; + return ERR_PTR(-ENOMEM); } #ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG @@ -1927,7 +1927,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) if (!init_group(mgr, &mgr->groups[i])) { destroy_mgr(mgr); - return NULL; + return ERR_PTR(-ENOMEM); } mgr->group_count++; diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 2691e8b3e480..c205f37da1e1 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -503,6 +503,9 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, connector_state_get_mode(new_conn_state); int ret; + if (!new_conn_state->crtc || !new_conn_state->best_encoder) + return 0; + new_conn_state->hdmi.is_limited_range = hdmi_is_limited_range(connector, new_conn_state); ret = hdmi_compute_config(connector, new_conn_state, mode); @@ -788,6 +791,8 @@ drm_atomic_helper_connector_hdmi_update(struct drm_connector *connector, if (status == connector_status_disconnected) { // TODO: also handle CEC and scramber, HDMI sink disconnected. drm_connector_hdmi_audio_plugged_notify(connector, false); + drm_edid_connector_update(connector, NULL); + return; } if (connector->hdmi.funcs->read_edid) diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index c6af46dd02bf..241a384ebce3 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -207,6 +207,10 @@ void drm_bridge_add(struct drm_bridge *bridge) { mutex_init(&bridge->hpd_mutex); + if (bridge->ops & DRM_BRIDGE_OP_HDMI) + bridge->ycbcr_420_allowed = !!(bridge->supported_formats & + BIT(HDMI_COLORSPACE_YUV420)); + mutex_lock(&bridge_lock); list_add_tail(&bridge->list, &bridge_list); mutex_unlock(&bridge_lock); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 251f94313717..aca442c25209 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -743,6 +743,15 @@ retry: if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; + for (i = 0; i < count; i++) { + struct drm_connector *connector = connectors[i]; + + if (connector->has_tile) + drm_client_get_tile_offsets(dev, connectors, connector_count, + modes, offsets, i, + connector->tile_h_loc, connector->tile_v_loc); + } + /* * If the BIOS didn't enable everything it could, fall back to have the * same user experiencing of lighting up as much as possible like the diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 1383fa9fff9b..5f24d6b41cc6 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -592,6 +592,9 @@ int drmm_connector_hdmi_init(struct drm_device *dev, if (!supported_formats || !(supported_formats & BIT(HDMI_COLORSPACE_RGB))) return -EINVAL; + if (connector->ycbcr_420_allowed != !!(supported_formats & BIT(HDMI_COLORSPACE_YUV420))) + return -EINVAL; + if (!(max_bpc == 8 || max_bpc == 10 || max_bpc == 12)) return -EINVAL; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c2c172eb25df..3cf440eee8a2 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -26,6 +26,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include <linux/cgroup_dmem.h> #include <linux/debugfs.h> #include <linux/fs.h> #include <linux/module.h> @@ -820,6 +821,37 @@ void drm_dev_put(struct drm_device *dev) } EXPORT_SYMBOL(drm_dev_put); +static void drmm_cg_unregister_region(struct drm_device *dev, void *arg) +{ + dmem_cgroup_unregister_region(arg); +} + +/** + * drmm_cgroup_register_region - Register a region of a DRM device to cgroups + * @dev: device for region + * @region_name: Region name for registering + * @size: Size of region in bytes + * + * This decreases the ref-count of @dev by one. The device is destroyed if the + * ref-count drops to zero. + */ +struct dmem_cgroup_region *drmm_cgroup_register_region(struct drm_device *dev, const char *region_name, u64 size) +{ + struct dmem_cgroup_region *region; + int ret; + + region = dmem_cgroup_register_region(size, "drm/%s/%s", dev->unique, region_name); + if (IS_ERR_OR_NULL(region)) + return region; + + ret = drmm_add_action_or_reset(dev, drmm_cg_unregister_region, region); + if (ret) + return ERR_PTR(ret); + + return region; +} +EXPORT_SYMBOL_GPL(drmm_cgroup_register_region); + static int create_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index c9008113111b..fb3614a7ba44 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1354,14 +1354,14 @@ int drm_fb_helper_set_par(struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_set_par); -static void pan_set(struct drm_fb_helper *fb_helper, int x, int y) +static void pan_set(struct drm_fb_helper *fb_helper, int dx, int dy) { struct drm_mode_set *mode_set; mutex_lock(&fb_helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &fb_helper->client) { - mode_set->x = x; - mode_set->y = y; + mode_set->x += dx; + mode_set->y += dy; } mutex_unlock(&fb_helper->client.modeset_mutex); } @@ -1370,16 +1370,18 @@ static int pan_display_atomic(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - int ret; + int ret, dx, dy; - pan_set(fb_helper, var->xoffset, var->yoffset); + dx = var->xoffset - info->var.xoffset; + dy = var->yoffset - info->var.yoffset; + pan_set(fb_helper, dx, dy); ret = drm_client_modeset_commit_locked(&fb_helper->client); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } else - pan_set(fb_helper, info->var.xoffset, info->var.yoffset); + pan_set(fb_helper, -dx, -dy); return ret; } diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 513c04f74420..e72f855fc495 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1286,14 +1286,11 @@ EXPORT_SYMBOL(drm_mode_set_name); */ int drm_mode_vrefresh(const struct drm_display_mode *mode) { - unsigned int num, den; + unsigned int num = 1, den = 1; if (mode->htotal == 0 || mode->vtotal == 0) return 0; - num = mode->clock; - den = mode->htotal * mode->vtotal; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) num *= 2; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -1301,6 +1298,12 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->vscan > 1) den *= mode->vscan; + if (check_mul_overflow(mode->clock, num, &num)) + return 0; + + if (check_mul_overflow(mode->htotal * mode->vtotal, den, &den)) + return 0; + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den); } EXPORT_SYMBOL(drm_mode_vrefresh); diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09500cddc009..bcf248f69252 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -929,10 +929,9 @@ impl QrImage<'_> { /// * `tmp` must be valid for reading and writing for `tmp_size` bytes. /// /// They must remain valid for the duration of the function call. - #[no_mangle] pub unsafe extern "C" fn drm_panic_qr_generate( - url: *const i8, + url: *const kernel::ffi::c_char, data: *mut u8, data_len: usize, data_size: usize, diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index 08cfea04e22b..79517bd4418f 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -390,3 +390,26 @@ void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset) } } EXPORT_SYMBOL(drm_print_regset32); + +/** + * drm_print_hex_dump - print a hex dump to a &drm_printer stream + * @p: The &drm_printer + * @prefix: Prefix for each line, may be NULL for no prefix + * @buf: Buffer to dump + * @len: Length of buffer + * + * Print hex dump to &drm_printer, with 16 space-separated hex bytes per line, + * optionally with a prefix on each line. No separator is added after prefix. + */ +void drm_print_hex_dump(struct drm_printer *p, const char *prefix, + const u8 *buf, size_t len) +{ + int i; + + for (i = 0; i < len; i += 16) { + int bytes_per_line = min(16, len - i); + + drm_printf(p, "%s%*ph\n", prefix ?: "", bytes_per_line, buf + i); + } +} +EXPORT_SYMBOL(drm_print_hex_dump); diff --git a/drivers/gpu/drm/drm_vblank_work.c b/drivers/gpu/drm/drm_vblank_work.c index 1752ffb44e1d..9cc71120246f 100644 --- a/drivers/gpu/drm/drm_vblank_work.c +++ b/drivers/gpu/drm/drm_vblank_work.c @@ -277,7 +277,7 @@ int drm_vblank_worker_init(struct drm_vblank_crtc *vblank) INIT_LIST_HEAD(&vblank->pending_work); init_waitqueue_head(&vblank->work_wait_queue); - worker = kthread_create_worker(0, "card%d-crtc%d", + worker = kthread_run_worker(0, "card%d-crtc%d", vblank->dev->primary->index, vblank->pipe); if (IS_ERR(worker)) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c index 7aa5f14d0c87..3a221923f15d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c @@ -9,7 +9,6 @@ #include "etnaviv_gem.h" #include "etnaviv_gpu.h" #include "etnaviv_mmu.h" -#include "etnaviv_perfmon.h" #define SUBALLOC_SIZE SZ_512K #define SUBALLOC_GRANULE SZ_4K @@ -100,7 +99,7 @@ retry: mutex_unlock(&suballoc->lock); ret = wait_event_interruptible_timeout(suballoc->free_event, suballoc->free_space, - msecs_to_jiffies(10 * 1000)); + secs_to_jiffies(10)); if (!ret) { dev_err(suballoc->dev, "Timeout waiting for cmdbuf space\n"); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 00707001d112..3e91747ed339 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,16 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = { ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW), }; -DEFINE_DRM_GEM_FOPS(fops); +static void etnaviv_show_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + drm_show_memory_stats(p, file); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = drm_show_fdinfo, +}; static const struct drm_driver etnaviv_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER, @@ -498,6 +507,7 @@ static const struct drm_driver etnaviv_drm_driver = { #ifdef CONFIG_DEBUG_FS .debugfs_init = etnaviv_debugfs_init, #endif + .show_fdinfo = etnaviv_show_fdinfo, .ioctls = etnaviv_ioctls, .num_ioctls = DRM_ETNAVIV_NUM_IOCTLS, .fops = &fops, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 16473c371444..2f844e82bc46 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -342,6 +342,7 @@ void *etnaviv_gem_vmap(struct drm_gem_object *obj) static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) { struct page **pages; + pgprot_t prot; lockdep_assert_held(&obj->lock); @@ -349,8 +350,19 @@ static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) if (IS_ERR(pages)) return NULL; - return vmap(pages, obj->base.size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + switch (obj->flags & ETNA_BO_CACHE_MASK) { + case ETNA_BO_CACHED: + prot = PAGE_KERNEL; + break; + case ETNA_BO_UNCACHED: + prot = pgprot_noncached(PAGE_KERNEL); + break; + case ETNA_BO_WC: + default: + prot = pgprot_writecombine(PAGE_KERNEL); + } + + return vmap(pages, obj->base.size >> PAGE_SHIFT, VM_MAP, prot); } static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op) @@ -528,6 +540,17 @@ void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) mutex_unlock(&priv->gem_lock); } +static enum drm_gem_object_status etnaviv_gem_status(struct drm_gem_object *obj) +{ + struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); + enum drm_gem_object_status status = 0; + + if (etnaviv_obj->pages) + status |= DRM_GEM_OBJECT_RESIDENT; + + return status; +} + static const struct vm_operations_struct vm_ops = { .fault = etnaviv_gem_fault, .open = drm_gem_vm_open, @@ -541,6 +564,7 @@ static const struct drm_gem_object_funcs etnaviv_gem_object_funcs = { .get_sg_table = etnaviv_gem_prime_get_sg_table, .vmap = etnaviv_gem_prime_vmap, .mmap = etnaviv_gem_mmap, + .status = etnaviv_gem_status, .vm_ops = &vm_ops, }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index 687555aae807..e5ee82a0674c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -44,9 +44,7 @@ struct etnaviv_gem_object { u32 flags; struct list_head gem_node; - struct etnaviv_gpu *gpu; /* non-null if active */ atomic_t gpu_active; - u32 access; struct page **pages; struct sg_table *sgt; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 2d4c112ce033..cf0d9049bcf1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> +#include <linux/reset.h> #include <linux/thermal.h> #include "etnaviv_cmdbuf.h" @@ -172,6 +173,29 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) return 0; } +static int etnaviv_gpu_reset_deassert(struct etnaviv_gpu *gpu) +{ + int ret; + + /* + * 32 core clock cycles (slowest clock) required before deassertion + * 1 microsecond might match all implementations without computation + */ + usleep_range(1, 2); + + ret = reset_control_deassert(gpu->rst); + if (ret) + return ret; + + /* + * 128 core clock cycles (slowest clock) required before any activity on AHB + * 1 microsecond might match all implementations without computation + */ + usleep_range(1, 2); + + return 0; +} + static inline bool etnaviv_is_model_rev(struct etnaviv_gpu *gpu, u32 model, u32 revision) { return gpu->identity.model == model && @@ -799,6 +823,12 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) goto pm_put; } + ret = etnaviv_gpu_reset_deassert(gpu); + if (ret) { + dev_err(gpu->dev, "GPU reset deassert failed\n"); + goto fail; + } + etnaviv_hw_identify(gpu); if (gpu->identity.model == 0) { @@ -1860,6 +1890,17 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) if (IS_ERR(gpu->mmio)) return PTR_ERR(gpu->mmio); + + /* Get Reset: */ + gpu->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); + if (IS_ERR(gpu->rst)) + return dev_err_probe(dev, PTR_ERR(gpu->rst), + "failed to get reset\n"); + + err = reset_control_assert(gpu->rst); + if (err) + return dev_err_probe(dev, err, "failed to assert reset\n"); + /* Get Interrupt: */ gpu->irq = platform_get_irq(pdev, 0); if (gpu->irq < 0) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 4d8a7d48ade3..5cb46c84e03a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -93,6 +93,7 @@ struct etnaviv_event { struct etnaviv_cmdbuf_suballoc; struct regulator; struct clk; +struct reset_control; #define ETNA_NR_EVENTS 30 @@ -158,6 +159,7 @@ struct etnaviv_gpu { struct clk *clk_reg; struct clk *clk_core; struct clk *clk_shader; + struct reset_control *rst; unsigned int freq_scale; unsigned int fe_waitcycles; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 7e065b3723cf..df5192083b20 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -19,12 +19,6 @@ static void etnaviv_context_unmap(struct etnaviv_iommu_context *context, size_t unmapped_page, unmapped = 0; size_t pgsize = SZ_4K; - if (!IS_ALIGNED(iova | size, pgsize)) { - pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%zx\n", - iova, size, pgsize); - return; - } - while (unmapped < size) { unmapped_page = context->global->ops->unmap(context, iova, pgsize); @@ -45,12 +39,6 @@ static int etnaviv_context_map(struct etnaviv_iommu_context *context, size_t orig_size = size; int ret = 0; - if (!IS_ALIGNED(iova | paddr | size, pgsize)) { - pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%zx\n", - iova, &paddr, size, pgsize); - return -EINVAL; - } - while (size) { ret = context->global->ops->map(context, iova, paddr, pgsize, prot); @@ -82,11 +70,19 @@ static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, return -EINVAL; for_each_sgtable_dma_sg(sgt, sg, i) { - phys_addr_t pa = sg_dma_address(sg) - sg->offset; - unsigned int da_len = sg_dma_len(sg) + sg->offset; + phys_addr_t pa = sg_dma_address(sg); + unsigned int da_len = sg_dma_len(sg); unsigned int bytes = min_t(unsigned int, da_len, va_len); - VERB("map[%d]: %08x %pap(%x)", i, iova, &pa, bytes); + VERB("map[%d]: %08x %pap(%x)", i, da, &pa, bytes); + + if (!IS_ALIGNED(iova | pa | bytes, SZ_4K)) { + dev_err(context->global->dev, + "unaligned: iova 0x%x pa %pa size 0x%x\n", + iova, &pa, bytes); + ret = -EINVAL; + goto fail; + } ret = etnaviv_context_map(context, da, pa, bytes, prot); if (ret) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 31710d98cad5..3dda9f0eda82 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -30,11 +30,11 @@ i915-y += \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ - i915_suspend.o \ i915_switcheroo.o \ i915_sysfs.o \ i915_utils.o \ intel_clock_gating.o \ + intel_cpu_info.o \ intel_device_info.o \ intel_memory_region.o \ intel_pcode.o \ @@ -43,6 +43,7 @@ i915-y += \ intel_sbi.o \ intel_step.o \ intel_uncore.o \ + intel_uncore_trace.o \ intel_wakeref.o \ vlv_sideband.o \ vlv_suspend.o @@ -220,6 +221,7 @@ i915-$(CONFIG_HWMON) += \ i915-y += \ display/hsw_ips.o \ display/i9xx_plane.o \ + display/i9xx_display_sr.o \ display/i9xx_wm.o \ display/intel_alpm.o \ display/intel_atomic.o \ @@ -236,6 +238,7 @@ i915-y += \ display/intel_crtc_state_dump.o \ display/intel_cursor.o \ display/intel_display.o \ + display/intel_display_conversion.o \ display/intel_display_driver.o \ display/intel_display_irq.o \ display/intel_display_params.o \ diff --git a/drivers/gpu/drm/i915/display/dvo_ns2501.c b/drivers/gpu/drm/i915/display/dvo_ns2501.c index cb619ffc974f..7146a9ed2213 100644 --- a/drivers/gpu/drm/i915/display/dvo_ns2501.c +++ b/drivers/gpu/drm/i915/display/dvo_ns2501.c @@ -26,7 +26,6 @@ * */ -#include "i915_drv.h" #include "intel_display_types.h" #include "intel_dvo_dev.h" diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 4fbec065d53e..56353377466c 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -8,6 +8,7 @@ #include <linux/string_helpers.h> #include "g4x_dp.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_audio.h" #include "intel_backlight.h" @@ -55,8 +56,8 @@ const struct dpll *vlv_get_dpll(struct drm_i915_private *i915) return IS_CHERRYVIEW(i915) ? &chv_dpll[0] : &vlv_dpll[0]; } -void g4x_dp_set_clock(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) +static void g4x_dp_set_clock(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct dpll *divisor = NULL; @@ -1223,6 +1224,25 @@ static bool ilk_digital_port_connected(struct intel_encoder *encoder) return intel_de_read(display, DEISR) & bit; } +static int g4x_dp_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + int ret; + + if (HAS_PCH_SPLIT(i915) && encoder->port != PORT_A) + crtc_state->has_pch_encoder = true; + + ret = intel_dp_compute_config(encoder, crtc_state, conn_state); + if (ret) + return ret; + + g4x_dp_set_clock(encoder, crtc_state); + + return 0; +} + static void g4x_dp_suspend_complete(struct intel_encoder *encoder) { /* @@ -1307,7 +1327,7 @@ bool g4x_dp_init(struct drm_i915_private *dev_priv, intel_encoder_link_check_init(intel_encoder, intel_dp_link_check); intel_encoder->hotplug = intel_dp_hotplug; - intel_encoder->compute_config = intel_dp_compute_config; + intel_encoder->compute_config = g4x_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; intel_encoder->sync_state = intel_dp_sync_state; diff --git a/drivers/gpu/drm/i915/display/g4x_dp.h b/drivers/gpu/drm/i915/display/g4x_dp.h index c75e64ae79b7..839a251dc069 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.h +++ b/drivers/gpu/drm/i915/display/g4x_dp.h @@ -19,8 +19,6 @@ struct intel_encoder; #ifdef I915 const struct dpll *vlv_get_dpll(struct drm_i915_private *i915); -void g4x_dp_set_clock(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); bool g4x_dp_port_enabled(struct drm_i915_private *dev_priv, i915_reg_t dp_reg, enum port port, enum pipe *pipe); @@ -31,10 +29,6 @@ static inline const struct dpll *vlv_get_dpll(struct drm_i915_private *i915) { return NULL; } -static inline void g4x_dp_set_clock(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) -{ -} static inline bool g4x_dp_port_enabled(struct drm_i915_private *dev_priv, i915_reg_t dp_reg, int port, enum pipe *pipe) diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index d1a7d0d57c6b..98e6a931042f 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -6,6 +6,7 @@ */ #include "g4x_hdmi.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_audio.h" @@ -682,7 +683,7 @@ static bool assert_hdmi_port_valid(struct drm_i915_private *i915, enum port port "Platform does not support HDMI %c\n", port_name(port)); } -void g4x_hdmi_init(struct drm_i915_private *dev_priv, +bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port) { struct intel_display *display = &dev_priv->display; @@ -692,10 +693,10 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, struct intel_connector *intel_connector; if (!assert_port_valid(dev_priv, port)) - return; + return false; if (!assert_hdmi_port_valid(dev_priv, port)) - return; + return false; devdata = intel_bios_encoder_data_lookup(display, port); @@ -706,15 +707,13 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); if (!dig_port) - return; + return false; dig_port->aux_ch = AUX_CH_NONE; intel_connector = intel_connector_alloc(); - if (!intel_connector) { - kfree(dig_port); - return; - } + if (!intel_connector) + goto err_connector_alloc; intel_encoder = &dig_port->base; @@ -722,9 +721,10 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, mutex_init(&dig_port->hdcp_mutex); - drm_encoder_init(&dev_priv->drm, &intel_encoder->base, - &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, - "HDMI %c", port_name(port)); + if (drm_encoder_init(&dev_priv->drm, &intel_encoder->base, + &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, + "HDMI %c", port_name(port))) + goto err_encoder_init; intel_encoder->hotplug = intel_hdmi_hotplug; intel_encoder->compute_config = g4x_hdmi_compute_config; @@ -787,5 +787,17 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, intel_infoframe_init(dig_port); - intel_hdmi_init_connector(dig_port, intel_connector); + if (!intel_hdmi_init_connector(dig_port, intel_connector)) + goto err_init_connector; + + return true; + +err_init_connector: + drm_encoder_cleanup(&intel_encoder->base); +err_encoder_init: + kfree(intel_connector); +err_connector_alloc: + kfree(dig_port); + + return false; } diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.h b/drivers/gpu/drm/i915/display/g4x_hdmi.h index 817f55c7a3a1..a52e8986ec7a 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.h +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.h @@ -16,14 +16,15 @@ struct drm_connector; struct drm_i915_private; #ifdef I915 -void g4x_hdmi_init(struct drm_i915_private *dev_priv, +bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); int g4x_hdmi_connector_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state); #else -static inline void g4x_hdmi_init(struct drm_i915_private *dev_priv, +static inline bool g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, int port) { + return false; } static inline int g4x_hdmi_connector_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c index 34c5d28fc866..d02c328bf902 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.c +++ b/drivers/gpu/drm/i915/display/hsw_ips.c @@ -185,10 +185,12 @@ void hsw_ips_post_update(struct intel_atomic_state *state, /* IPS only exists on ULT machines and is tied to pipe A. */ bool hsw_crtc_supports_ips(struct intel_crtc *crtc) { - return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; + struct intel_display *display = to_intel_display(crtc); + + return HAS_IPS(display) && crtc->pipe == PIPE_A; } -bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) +static bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -218,6 +220,20 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) return true; } +int hsw_ips_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + + if (!IS_BROADWELL(i915)) + return 0; + + if (!hsw_crtc_state_ips_capable(crtc_state)) + return 0; + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + return DIV_ROUND_UP(crtc_state->pixel_rate * 100, 95); +} + int hsw_ips_compute_config(struct intel_atomic_state *state, struct intel_crtc *crtc) { diff --git a/drivers/gpu/drm/i915/display/hsw_ips.h b/drivers/gpu/drm/i915/display/hsw_ips.h index 35364228e1c1..7af12f88a8ce 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.h +++ b/drivers/gpu/drm/i915/display/hsw_ips.h @@ -19,7 +19,7 @@ bool hsw_ips_pre_update(struct intel_atomic_state *state, void hsw_ips_post_update(struct intel_atomic_state *state, struct intel_crtc *crtc); bool hsw_crtc_supports_ips(struct intel_crtc *crtc); -bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); +int hsw_ips_min_cdclk(const struct intel_crtc_state *crtc_state); int hsw_ips_compute_config(struct intel_atomic_state *state, struct intel_crtc *crtc); void hsw_ips_get_config(struct intel_crtc_state *crtc_state); @@ -42,9 +42,9 @@ static inline bool hsw_crtc_supports_ips(struct intel_crtc *crtc) { return false; } -static inline bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) +static inline int hsw_ips_min_cdclk(const struct intel_crtc_state *crtc_state) { - return false; + return 0; } static inline int hsw_ips_compute_config(struct intel_atomic_state *state, struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/display/i9xx_display_sr.c b/drivers/gpu/drm/i915/display/i9xx_display_sr.c new file mode 100644 index 000000000000..32abe9743014 --- /dev/null +++ b/drivers/gpu/drm/i915/display/i9xx_display_sr.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <drm/drm_device.h> + +#include "i915_reg.h" +#include "i9xx_display_sr.h" +#include "i9xx_wm_regs.h" +#include "intel_de.h" +#include "intel_gmbus.h" +#include "intel_pci_config.h" + +static void i9xx_display_save_swf(struct intel_display *display) +{ + int i; + + /* Scratch space */ + if (DISPLAY_VER(display) == 2 && display->platform.mobile) { + for (i = 0; i < 7; i++) { + display->restore.saveSWF0[i] = intel_de_read(display, SWF0(display, i)); + display->restore.saveSWF1[i] = intel_de_read(display, SWF1(display, i)); + } + for (i = 0; i < 3; i++) + display->restore.saveSWF3[i] = intel_de_read(display, SWF3(display, i)); + } else if (DISPLAY_VER(display) == 2) { + for (i = 0; i < 7; i++) + display->restore.saveSWF1[i] = intel_de_read(display, SWF1(display, i)); + } else if (HAS_GMCH(display)) { + for (i = 0; i < 16; i++) { + display->restore.saveSWF0[i] = intel_de_read(display, SWF0(display, i)); + display->restore.saveSWF1[i] = intel_de_read(display, SWF1(display, i)); + } + for (i = 0; i < 3; i++) + display->restore.saveSWF3[i] = intel_de_read(display, SWF3(display, i)); + } +} + +static void i9xx_display_restore_swf(struct intel_display *display) +{ + int i; + + /* Scratch space */ + if (DISPLAY_VER(display) == 2 && display->platform.mobile) { + for (i = 0; i < 7; i++) { + intel_de_write(display, SWF0(display, i), display->restore.saveSWF0[i]); + intel_de_write(display, SWF1(display, i), display->restore.saveSWF1[i]); + } + for (i = 0; i < 3; i++) + intel_de_write(display, SWF3(display, i), display->restore.saveSWF3[i]); + } else if (DISPLAY_VER(display) == 2) { + for (i = 0; i < 7; i++) + intel_de_write(display, SWF1(display, i), display->restore.saveSWF1[i]); + } else if (HAS_GMCH(display)) { + for (i = 0; i < 16; i++) { + intel_de_write(display, SWF0(display, i), display->restore.saveSWF0[i]); + intel_de_write(display, SWF1(display, i), display->restore.saveSWF1[i]); + } + for (i = 0; i < 3; i++) + intel_de_write(display, SWF3(display, i), display->restore.saveSWF3[i]); + } +} + +void i9xx_display_sr_save(struct intel_display *display) +{ + struct pci_dev *pdev = to_pci_dev(display->drm->dev); + + if (!HAS_DISPLAY(display)) + return; + + /* Display arbitration control */ + if (DISPLAY_VER(display) <= 4) + display->restore.saveDSPARB = intel_de_read(display, DSPARB(display)); + + if (DISPLAY_VER(display) == 4) + pci_read_config_word(pdev, GCDGMBUS, &display->restore.saveGCDGMBUS); + + i9xx_display_save_swf(display); +} + +void i9xx_display_sr_restore(struct intel_display *display) +{ + struct pci_dev *pdev = to_pci_dev(display->drm->dev); + + if (!HAS_DISPLAY(display)) + return; + + i9xx_display_restore_swf(display); + + if (DISPLAY_VER(display) == 4) + pci_write_config_word(pdev, GCDGMBUS, display->restore.saveGCDGMBUS); + + /* Display arbitration */ + if (DISPLAY_VER(display) <= 4) + intel_de_write(display, DSPARB(display), display->restore.saveDSPARB); +} diff --git a/drivers/gpu/drm/i915/display/i9xx_display_sr.h b/drivers/gpu/drm/i915/display/i9xx_display_sr.h new file mode 100644 index 000000000000..39b8c18fe738 --- /dev/null +++ b/drivers/gpu/drm/i915/display/i9xx_display_sr.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __I9XX_DISPLAY_SR_H__ +#define __I9XX_DISPLAY_SR_H__ + +struct intel_display; + +void i9xx_display_sr_save(struct intel_display *display); +void i9xx_display_sr_restore(struct intel_display *display); + +#endif diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 17a1e3801a85..48e657a80a16 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -8,6 +8,7 @@ #include <drm/drm_blend.h> #include <drm/drm_fourcc.h> +#include "i915_drv.h" #include "i915_reg.h" #include "i9xx_plane.h" #include "i9xx_plane_regs.h" diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index e3b13886177a..db78c1e6b0a3 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "i915_reg.h" #include "i9xx_wm.h" +#include "i9xx_wm_regs.h" #include "intel_atomic.h" #include "intel_bo.h" #include "intel_display.h" diff --git a/drivers/gpu/drm/i915/display/i9xx_wm_regs.h b/drivers/gpu/drm/i915/display/i9xx_wm_regs.h new file mode 100644 index 000000000000..d68d22235cf2 --- /dev/null +++ b/drivers/gpu/drm/i915/display/i9xx_wm_regs.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2024 Intel Corporation */ + +#ifndef __I9XX_WM_REGS_H__ +#define __I9XX_WM_REGS_H__ + +#include "intel_display_reg_defs.h" + +#define DSPARB(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70030) +#define DSPARB_CSTART_MASK (0x7f << 7) +#define DSPARB_CSTART_SHIFT 7 +#define DSPARB_BSTART_MASK (0x7f) +#define DSPARB_BSTART_SHIFT 0 +#define DSPARB_BEND_SHIFT 9 /* on 855 */ +#define DSPARB_AEND_SHIFT 0 +#define DSPARB_SPRITEA_SHIFT_VLV 0 +#define DSPARB_SPRITEA_MASK_VLV (0xff << 0) +#define DSPARB_SPRITEB_SHIFT_VLV 8 +#define DSPARB_SPRITEB_MASK_VLV (0xff << 8) +#define DSPARB_SPRITEC_SHIFT_VLV 16 +#define DSPARB_SPRITEC_MASK_VLV (0xff << 16) +#define DSPARB_SPRITED_SHIFT_VLV 24 +#define DSPARB_SPRITED_MASK_VLV (0xff << 24) +#define DSPARB2 _MMIO(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */ +#define DSPARB_SPRITEA_HI_SHIFT_VLV 0 +#define DSPARB_SPRITEA_HI_MASK_VLV (0x1 << 0) +#define DSPARB_SPRITEB_HI_SHIFT_VLV 4 +#define DSPARB_SPRITEB_HI_MASK_VLV (0x1 << 4) +#define DSPARB_SPRITEC_HI_SHIFT_VLV 8 +#define DSPARB_SPRITEC_HI_MASK_VLV (0x1 << 8) +#define DSPARB_SPRITED_HI_SHIFT_VLV 12 +#define DSPARB_SPRITED_HI_MASK_VLV (0x1 << 12) +#define DSPARB_SPRITEE_HI_SHIFT_VLV 16 +#define DSPARB_SPRITEE_HI_MASK_VLV (0x1 << 16) +#define DSPARB_SPRITEF_HI_SHIFT_VLV 20 +#define DSPARB_SPRITEF_HI_MASK_VLV (0x1 << 20) +#define DSPARB3 _MMIO(VLV_DISPLAY_BASE + 0x7006c) /* chv */ +#define DSPARB_SPRITEE_SHIFT_VLV 0 +#define DSPARB_SPRITEE_MASK_VLV (0xff << 0) +#define DSPARB_SPRITEF_SHIFT_VLV 8 +#define DSPARB_SPRITEF_MASK_VLV (0xff << 8) + +/* pnv/gen4/g4x/vlv/chv */ +#define DSPFW1(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70034) +#define DSPFW_SR_SHIFT 23 +#define DSPFW_SR_MASK (0x1ff << 23) +#define DSPFW_CURSORB_SHIFT 16 +#define DSPFW_CURSORB_MASK (0x3f << 16) +#define DSPFW_PLANEB_SHIFT 8 +#define DSPFW_PLANEB_MASK (0x7f << 8) +#define DSPFW_PLANEB_MASK_VLV (0xff << 8) /* vlv/chv */ +#define DSPFW_PLANEA_SHIFT 0 +#define DSPFW_PLANEA_MASK (0x7f << 0) +#define DSPFW_PLANEA_MASK_VLV (0xff << 0) /* vlv/chv */ +#define DSPFW2(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70038) +#define DSPFW_FBC_SR_EN (1 << 31) /* g4x */ +#define DSPFW_FBC_SR_SHIFT 28 +#define DSPFW_FBC_SR_MASK (0x7 << 28) /* g4x */ +#define DSPFW_FBC_HPLL_SR_SHIFT 24 +#define DSPFW_FBC_HPLL_SR_MASK (0xf << 24) /* g4x */ +#define DSPFW_SPRITEB_SHIFT (16) +#define DSPFW_SPRITEB_MASK (0x7f << 16) /* g4x */ +#define DSPFW_SPRITEB_MASK_VLV (0xff << 16) /* vlv/chv */ +#define DSPFW_CURSORA_SHIFT 8 +#define DSPFW_CURSORA_MASK (0x3f << 8) +#define DSPFW_PLANEC_OLD_SHIFT 0 +#define DSPFW_PLANEC_OLD_MASK (0x7f << 0) /* pre-gen4 sprite C */ +#define DSPFW_SPRITEA_SHIFT 0 +#define DSPFW_SPRITEA_MASK (0x7f << 0) /* g4x */ +#define DSPFW_SPRITEA_MASK_VLV (0xff << 0) /* vlv/chv */ +#define DSPFW3(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x7003c) +#define DSPFW_HPLL_SR_EN (1 << 31) +#define PINEVIEW_SELF_REFRESH_EN (1 << 30) +#define DSPFW_CURSOR_SR_SHIFT 24 +#define DSPFW_CURSOR_SR_MASK (0x3f << 24) +#define DSPFW_HPLL_CURSOR_SHIFT 16 +#define DSPFW_HPLL_CURSOR_MASK (0x3f << 16) +#define DSPFW_HPLL_SR_SHIFT 0 +#define DSPFW_HPLL_SR_MASK (0x1ff << 0) + +/* vlv/chv */ +#define DSPFW4 _MMIO(VLV_DISPLAY_BASE + 0x70070) +#define DSPFW_SPRITEB_WM1_SHIFT 16 +#define DSPFW_SPRITEB_WM1_MASK (0xff << 16) +#define DSPFW_CURSORA_WM1_SHIFT 8 +#define DSPFW_CURSORA_WM1_MASK (0x3f << 8) +#define DSPFW_SPRITEA_WM1_SHIFT 0 +#define DSPFW_SPRITEA_WM1_MASK (0xff << 0) +#define DSPFW5 _MMIO(VLV_DISPLAY_BASE + 0x70074) +#define DSPFW_PLANEB_WM1_SHIFT 24 +#define DSPFW_PLANEB_WM1_MASK (0xff << 24) +#define DSPFW_PLANEA_WM1_SHIFT 16 +#define DSPFW_PLANEA_WM1_MASK (0xff << 16) +#define DSPFW_CURSORB_WM1_SHIFT 8 +#define DSPFW_CURSORB_WM1_MASK (0x3f << 8) +#define DSPFW_CURSOR_SR_WM1_SHIFT 0 +#define DSPFW_CURSOR_SR_WM1_MASK (0x3f << 0) +#define DSPFW6 _MMIO(VLV_DISPLAY_BASE + 0x70078) +#define DSPFW_SR_WM1_SHIFT 0 +#define DSPFW_SR_WM1_MASK (0x1ff << 0) +#define DSPFW7 _MMIO(VLV_DISPLAY_BASE + 0x7007c) +#define DSPFW7_CHV _MMIO(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */ +#define DSPFW_SPRITED_WM1_SHIFT 24 +#define DSPFW_SPRITED_WM1_MASK (0xff << 24) +#define DSPFW_SPRITED_SHIFT 16 +#define DSPFW_SPRITED_MASK_VLV (0xff << 16) +#define DSPFW_SPRITEC_WM1_SHIFT 8 +#define DSPFW_SPRITEC_WM1_MASK (0xff << 8) +#define DSPFW_SPRITEC_SHIFT 0 +#define DSPFW_SPRITEC_MASK_VLV (0xff << 0) +#define DSPFW8_CHV _MMIO(VLV_DISPLAY_BASE + 0x700b8) +#define DSPFW_SPRITEF_WM1_SHIFT 24 +#define DSPFW_SPRITEF_WM1_MASK (0xff << 24) +#define DSPFW_SPRITEF_SHIFT 16 +#define DSPFW_SPRITEF_MASK_VLV (0xff << 16) +#define DSPFW_SPRITEE_WM1_SHIFT 8 +#define DSPFW_SPRITEE_WM1_MASK (0xff << 8) +#define DSPFW_SPRITEE_SHIFT 0 +#define DSPFW_SPRITEE_MASK_VLV (0xff << 0) +#define DSPFW9_CHV _MMIO(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */ +#define DSPFW_PLANEC_WM1_SHIFT 24 +#define DSPFW_PLANEC_WM1_MASK (0xff << 24) +#define DSPFW_PLANEC_SHIFT 16 +#define DSPFW_PLANEC_MASK_VLV (0xff << 16) +#define DSPFW_CURSORC_WM1_SHIFT 8 +#define DSPFW_CURSORC_WM1_MASK (0x3f << 16) +#define DSPFW_CURSORC_SHIFT 0 +#define DSPFW_CURSORC_MASK (0x3f << 0) + +/* vlv/chv high order bits */ +#define DSPHOWM _MMIO(VLV_DISPLAY_BASE + 0x70064) +#define DSPFW_SR_HI_SHIFT 24 +#define DSPFW_SR_HI_MASK (3 << 24) /* 2 bits for chv, 1 for vlv */ +#define DSPFW_SPRITEF_HI_SHIFT 23 +#define DSPFW_SPRITEF_HI_MASK (1 << 23) +#define DSPFW_SPRITEE_HI_SHIFT 22 +#define DSPFW_SPRITEE_HI_MASK (1 << 22) +#define DSPFW_PLANEC_HI_SHIFT 21 +#define DSPFW_PLANEC_HI_MASK (1 << 21) +#define DSPFW_SPRITED_HI_SHIFT 20 +#define DSPFW_SPRITED_HI_MASK (1 << 20) +#define DSPFW_SPRITEC_HI_SHIFT 16 +#define DSPFW_SPRITEC_HI_MASK (1 << 16) +#define DSPFW_PLANEB_HI_SHIFT 12 +#define DSPFW_PLANEB_HI_MASK (1 << 12) +#define DSPFW_SPRITEB_HI_SHIFT 8 +#define DSPFW_SPRITEB_HI_MASK (1 << 8) +#define DSPFW_SPRITEA_HI_SHIFT 4 +#define DSPFW_SPRITEA_HI_MASK (1 << 4) +#define DSPFW_PLANEA_HI_SHIFT 0 +#define DSPFW_PLANEA_HI_MASK (1 << 0) +#define DSPHOWM1 _MMIO(VLV_DISPLAY_BASE + 0x70068) +#define DSPFW_SR_WM1_HI_SHIFT 24 +#define DSPFW_SR_WM1_HI_MASK (3 << 24) /* 2 bits for chv, 1 for vlv */ +#define DSPFW_SPRITEF_WM1_HI_SHIFT 23 +#define DSPFW_SPRITEF_WM1_HI_MASK (1 << 23) +#define DSPFW_SPRITEE_WM1_HI_SHIFT 22 +#define DSPFW_SPRITEE_WM1_HI_MASK (1 << 22) +#define DSPFW_PLANEC_WM1_HI_SHIFT 21 +#define DSPFW_PLANEC_WM1_HI_MASK (1 << 21) +#define DSPFW_SPRITED_WM1_HI_SHIFT 20 +#define DSPFW_SPRITED_WM1_HI_MASK (1 << 20) +#define DSPFW_SPRITEC_WM1_HI_SHIFT 16 +#define DSPFW_SPRITEC_WM1_HI_MASK (1 << 16) +#define DSPFW_PLANEB_WM1_HI_SHIFT 12 +#define DSPFW_PLANEB_WM1_HI_MASK (1 << 12) +#define DSPFW_SPRITEB_WM1_HI_SHIFT 8 +#define DSPFW_SPRITEB_WM1_HI_MASK (1 << 8) +#define DSPFW_SPRITEA_WM1_HI_SHIFT 4 +#define DSPFW_SPRITEA_WM1_HI_MASK (1 << 4) +#define DSPFW_PLANEA_WM1_HI_SHIFT 0 +#define DSPFW_PLANEA_WM1_HI_MASK (1 << 0) + +/* drain latency register values*/ +#define VLV_DDL(pipe) _MMIO(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe)) +#define DDL_CURSOR_SHIFT 24 +#define DDL_SPRITE_SHIFT(sprite) (8 + 8 * (sprite)) +#define DDL_PLANE_SHIFT 0 +#define DDL_PRECISION_HIGH (1 << 7) +#define DDL_PRECISION_LOW (0 << 7) +#define DRAIN_LATENCY_MASK 0x7f + +/* FIFO watermark sizes etc */ +#define G4X_FIFO_LINE_SIZE 64 +#define I915_FIFO_LINE_SIZE 64 +#define I830_FIFO_LINE_SIZE 32 + +#define VALLEYVIEW_FIFO_SIZE 255 +#define G4X_FIFO_SIZE 127 +#define I965_FIFO_SIZE 512 +#define I945_FIFO_SIZE 127 +#define I915_FIFO_SIZE 95 +#define I855GM_FIFO_SIZE 127 /* In cachelines */ +#define I830_FIFO_SIZE 95 + +#define VALLEYVIEW_MAX_WM 0xff +#define G4X_MAX_WM 0x3f +#define I915_MAX_WM 0x3f + +#define PINEVIEW_DISPLAY_FIFO 512 /* in 64byte unit */ +#define PINEVIEW_FIFO_LINE_SIZE 64 +#define PINEVIEW_MAX_WM 0x1ff +#define PINEVIEW_DFT_WM 0x3f +#define PINEVIEW_DFT_HPLLOFF_WM 0 +#define PINEVIEW_GUARD_WM 10 +#define PINEVIEW_CURSOR_FIFO 64 +#define PINEVIEW_CURSOR_MAX_WM 0x3f +#define PINEVIEW_CURSOR_DFT_WM 0 +#define PINEVIEW_CURSOR_GUARD_WM 5 + +#define VALLEYVIEW_CURSOR_MAX_WM 64 +#define I965_CURSOR_FIFO 64 +#define I965_CURSOR_MAX_WM 32 +#define I965_CURSOR_DFT_WM 8 + +/* define the Watermark register on Ironlake */ +#define _WM0_PIPEA_ILK 0x45100 +#define _WM0_PIPEB_ILK 0x45104 +#define _WM0_PIPEC_IVB 0x45200 +#define WM0_PIPE_ILK(pipe) _MMIO_BASE_PIPE3(0, (pipe), _WM0_PIPEA_ILK, \ + _WM0_PIPEB_ILK, _WM0_PIPEC_IVB) +#define WM0_PIPE_PRIMARY_MASK REG_GENMASK(31, 16) +#define WM0_PIPE_SPRITE_MASK REG_GENMASK(15, 8) +#define WM0_PIPE_CURSOR_MASK REG_GENMASK(7, 0) +#define WM0_PIPE_PRIMARY(x) REG_FIELD_PREP(WM0_PIPE_PRIMARY_MASK, (x)) +#define WM0_PIPE_SPRITE(x) REG_FIELD_PREP(WM0_PIPE_SPRITE_MASK, (x)) +#define WM0_PIPE_CURSOR(x) REG_FIELD_PREP(WM0_PIPE_CURSOR_MASK, (x)) +#define WM1_LP_ILK _MMIO(0x45108) +#define WM2_LP_ILK _MMIO(0x4510c) +#define WM3_LP_ILK _MMIO(0x45110) +#define WM_LP_ENABLE REG_BIT(31) +#define WM_LP_LATENCY_MASK REG_GENMASK(30, 24) +#define WM_LP_FBC_MASK_BDW REG_GENMASK(23, 19) +#define WM_LP_FBC_MASK_ILK REG_GENMASK(23, 20) +#define WM_LP_PRIMARY_MASK REG_GENMASK(18, 8) +#define WM_LP_CURSOR_MASK REG_GENMASK(7, 0) +#define WM_LP_LATENCY(x) REG_FIELD_PREP(WM_LP_LATENCY_MASK, (x)) +#define WM_LP_FBC_BDW(x) REG_FIELD_PREP(WM_LP_FBC_MASK_BDW, (x)) +#define WM_LP_FBC_ILK(x) REG_FIELD_PREP(WM_LP_FBC_MASK_ILK, (x)) +#define WM_LP_PRIMARY(x) REG_FIELD_PREP(WM_LP_PRIMARY_MASK, (x)) +#define WM_LP_CURSOR(x) REG_FIELD_PREP(WM_LP_CURSOR_MASK, (x)) +#define WM1S_LP_ILK _MMIO(0x45120) +#define WM2S_LP_IVB _MMIO(0x45124) +#define WM3S_LP_IVB _MMIO(0x45128) +#define WM_LP_SPRITE_ENABLE REG_BIT(31) /* ilk/snb WM1S only */ +#define WM_LP_SPRITE_MASK REG_GENMASK(10, 0) +#define WM_LP_SPRITE(x) REG_FIELD_PREP(WM_LP_SPRITE_MASK, (x)) + +#define WM_MISC _MMIO(0x45260) +#define WM_MISC_DATA_PARTITION_5_6 (1 << 0) + +#define WM_DBG _MMIO(0x45280) +#define WM_DBG_DISALLOW_MULTIPLE_LP (1 << 0) +#define WM_DBG_DISALLOW_MAXFIFO (1 << 1) +#define WM_DBG_DISALLOW_SPRITE (1 << 2) + +#endif /* __I9XX_WM_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index afe90cf0842a..09034ef651f0 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -31,6 +31,7 @@ #include <drm/drm_mipi_dsi.h> #include <drm/drm_probe_helper.h> +#include "i915_drv.h" #include "i915_reg.h" #include "icl_dsi.h" #include "icl_dsi_regs.h" @@ -1602,7 +1603,9 @@ static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, /* FIXME: split only when necessary */ if (crtc_state->dsc.slice_count > 1) - crtc_state->dsc.dsc_split = true; + crtc_state->dsc.num_streams = 2; + else + crtc_state->dsc.num_streams = 1; /* FIXME: initialize from VBT */ vdsc_cfg->rc_model_size = DSC_RC_MODEL_SIZE_CONST; diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index c3b29a331d72..bbf8c5a8fdbd 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -9,8 +9,9 @@ #include <linux/acpi.h> #include <acpi/video.h> -#include "i915_drv.h" +#include "i915_utils.h" #include "intel_acpi.h" +#include "intel_display_core.h" #include "intel_display_types.h" #define INTEL_DSM_REVISION_ID 1 /* For Calpella anyway... */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index d89630b2d5c1..612e9b0ec14a 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -40,6 +40,7 @@ #include <drm/drm_gem.h> #include <drm/drm_gem_atomic_helper.h> +#include "i915_drv.h" #include "i915_config.h" #include "i9xx_plane_regs.h" #include "intel_atomic_plane.h" @@ -207,17 +208,6 @@ unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, fb->format->cpp[color_plane]; } -static bool -use_min_ddb(const struct intel_crtc_state *crtc_state, - struct intel_plane *plane) -{ - struct drm_i915_private *i915 = to_i915(plane->base.dev); - - return DISPLAY_VER(i915) >= 13 && - crtc_state->uapi.async_flip && - plane->async_flip; -} - static unsigned int intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, @@ -225,8 +215,8 @@ intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); const struct drm_framebuffer *fb = plane_state->hw.fb; - int width, height; unsigned int rel_data_rate; + int width, height; if (plane->id == PLANE_CURSOR) return 0; @@ -235,14 +225,6 @@ intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, return 0; /* - * We calculate extra ddb based on ratio plane rate/total data rate - * in case, in some cases we should not allocate extra ddb for the plane, - * so do not count its data rate, if this is the case. - */ - if (use_min_ddb(crtc_state, plane)) - return 0; - - /* * Src coordinates are already rotated by 270 degrees for * the 90/270 degree plane rotation cases (to match the * GTT mapping), hence no need to account for rotation here. @@ -256,7 +238,11 @@ intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, height /= 2; } - rel_data_rate = width * height * fb->format->cpp[color_plane]; + rel_data_rate = + skl_plane_relative_data_rate(crtc_state, plane, width, height, + fb->format->cpp[color_plane]); + if (!rel_data_rate) + return 0; return intel_adjusted_rate(&plane_state->uapi.src, &plane_state->uapi.dst, diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 3902ab843113..ce8a4319a63c 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -681,12 +681,11 @@ static void ibx_audio_codec_enable(struct intel_encoder *encoder, void intel_audio_sdp_split_update(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder trans = crtc_state->cpu_transcoder; - if (HAS_DP20(i915)) - intel_de_rmw(i915, AUD_DP_2DOT0_CTRL(trans), AUD_ENABLE_SDP_SPLIT, + if (HAS_DP20(display)) + intel_de_rmw(display, AUD_DP_2DOT0_CTRL(trans), AUD_ENABLE_SDP_SPLIT, crtc_state->sdp_split_enable ? AUD_ENABLE_SDP_SPLIT : 0); } @@ -981,6 +980,53 @@ retry: drm_modeset_acquire_fini(&ctx); } +int intel_audio_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct drm_i915_private *dev_priv = to_i915(display->drm); + int min_cdclk = 0; + + if (!crtc_state->has_audio) + return 0; + + /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, + * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else + * there may be audio corruption or screen corruption." This cdclk + * restriction for GLK is 316.8 MHz. + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) { + if (DISPLAY_VER(display) == 10) { + /* Display WA #1145: glk */ + min_cdclk = max(min_cdclk, 316800); + } else if (DISPLAY_VER(display) == 9 || IS_BROADWELL(dev_priv)) { + /* Display WA #1144: skl,bxt */ + min_cdclk = max(min_cdclk, 432000); + } + } + + /* + * According to BSpec, "The CD clock frequency must be at least twice + * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. + */ + if (DISPLAY_VER(display) >= 9) + min_cdclk = max(min_cdclk, 2 * 96000); + + /* + * "For DP audio configuration, cdclk frequency shall be set to + * meet the following requirements: + * DP Link Frequency(MHz) | Cdclk frequency(MHz) + * 270 | 320 or higher + * 162 | 200 or higher" + */ + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + intel_crtc_has_dp_encoder(crtc_state)) + min_cdclk = max(min_cdclk, crtc_state->port_clock); + + return min_cdclk; +} + static unsigned long i915_audio_component_get_power(struct device *kdev) { struct intel_display *display = to_intel_display(kdev); diff --git a/drivers/gpu/drm/i915/display/intel_audio.h b/drivers/gpu/drm/i915/display/intel_audio.h index 576c061d72a4..1bafc155434a 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.h +++ b/drivers/gpu/drm/i915/display/intel_audio.h @@ -27,6 +27,7 @@ void intel_audio_codec_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state); void intel_audio_cdclk_change_pre(struct drm_i915_private *dev_priv); void intel_audio_cdclk_change_post(struct drm_i915_private *dev_priv); +int intel_audio_min_cdclk(const struct intel_crtc_state *crtc_state); void intel_audio_init(struct drm_i915_private *dev_priv); void intel_audio_register(struct drm_i915_private *i915); void intel_audio_deinit(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 3f81a726cc7d..fc1e517e074a 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -10,6 +10,7 @@ #include <acpi/video.h> +#include "i915_drv.h" #include "i915_reg.h" #include "intel_backlight.h" #include "intel_backlight_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index a4cdd82c4a75..e0e4e9b62d8d 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1402,12 +1402,21 @@ parse_power_conservation_features(struct intel_display *display, panel_type); } +static void vbt_edp_to_pps_delays(struct intel_pps_delays *pps, + const struct edp_power_seq *edp_pps) +{ + pps->power_up = edp_pps->t1_t3; + pps->backlight_on = edp_pps->t8; + pps->backlight_off = edp_pps->t9; + pps->power_down = edp_pps->t10; + pps->power_cycle = edp_pps->t11_t12; +} + static void parse_edp(struct intel_display *display, struct intel_panel *panel) { const struct bdb_edp *edp; - const struct edp_power_seq *edp_pps; const struct edp_fast_link_params *edp_link_params; int panel_type = panel->vbt.panel_type; @@ -1428,10 +1437,10 @@ parse_edp(struct intel_display *display, } /* Get the eDP sequencing and link info */ - edp_pps = &edp->power_seqs[panel_type]; edp_link_params = &edp->fast_link_params[panel_type]; - panel->vbt.edp.pps = *edp_pps; + vbt_edp_to_pps_delays(&panel->vbt.edp.pps, + &edp->power_seqs[panel_type]); if (display->vbt.version >= 224) { panel->vbt.edp.rate = diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 8b703f6cfe17..f9841f0498c6 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -50,14 +50,6 @@ enum intel_backlight_type { INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE, }; -struct edp_power_seq { - u16 t1_t3; - u16 t8; - u16 t9; - u16 t10; - u16 t11_t12; -} __packed; - /* * MIPI Sequence Block definitions * diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index a52b0ae68b96..23edc81741de 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -1256,7 +1256,7 @@ int intel_bw_min_cdclk(struct drm_i915_private *i915, min_cdclk = intel_bw_dbuf_min_cdclk(i915, bw_state); for_each_pipe(i915, pipe) - min_cdclk = max(bw_state->min_cdclk[pipe], min_cdclk); + min_cdclk = max(min_cdclk, bw_state->min_cdclk[pipe]); return min_cdclk; } @@ -1447,13 +1447,14 @@ static const struct intel_global_state_funcs intel_bw_funcs = { int intel_bw_init(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct intel_bw_state *state; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; - intel_atomic_global_obj_init(i915, &i915->display.bw.obj, + intel_atomic_global_obj_init(display, &display->bw.obj, &state->base, &intel_bw_funcs); /* diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 03c4eef3f92a..c7a603589412 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -29,6 +29,7 @@ #include "soc/intel_dram.h" #include "hsw_ips.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" @@ -37,7 +38,6 @@ #include "intel_cdclk.h" #include "intel_crtc.h" #include "intel_de.h" -#include "intel_dp.h" #include "intel_display_types.h" #include "intel_mchbar_regs.h" #include "intel_pci_config.h" @@ -46,6 +46,7 @@ #include "intel_vdsc.h" #include "skl_watermark.h" #include "skl_watermark_regs.h" +#include "vlv_dsi.h" #include "vlv_sideband.h" /** @@ -2761,154 +2762,62 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) "Post changing CDCLK to"); } -static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) +/* pixels per CDCLK */ +static int intel_cdclk_ppc(struct intel_display *display, bool double_wide) +{ + return DISPLAY_VER(display) >= 10 || double_wide ? 2 : 1; +} + +/* max pixel rate as % of CDCLK (not accounting for PPC) */ +static int intel_cdclk_guardband(struct intel_display *display) { - struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *dev_priv = to_i915(display->drm); - int pixel_rate = crtc_state->pixel_rate; - if (DISPLAY_VER(display) >= 10) - return DIV_ROUND_UP(pixel_rate, 2); - else if (DISPLAY_VER(display) == 9 || - IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) - return pixel_rate; + if (DISPLAY_VER(display) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return 100; else if (IS_CHERRYVIEW(dev_priv)) - return DIV_ROUND_UP(pixel_rate * 100, 95); - else if (crtc_state->double_wide) - return DIV_ROUND_UP(pixel_rate * 100, 90 * 2); + return 95; else - return DIV_ROUND_UP(pixel_rate * 100, 90); + return 90; } -static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) +static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_display *display = to_intel_display(crtc); - struct intel_plane *plane; - int min_cdclk = 0; - - for_each_intel_plane_on_crtc(display->drm, crtc, plane) - min_cdclk = max(crtc_state->min_cdclk[plane->id], min_cdclk); + struct intel_display *display = to_intel_display(crtc_state); + int ppc = intel_cdclk_ppc(display, crtc_state->double_wide); + int guardband = intel_cdclk_guardband(display); + int pixel_rate = crtc_state->pixel_rate; - return min_cdclk; + return DIV_ROUND_UP(pixel_rate * 100, guardband * ppc); } -static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) +static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_display *display = to_intel_display(crtc); - int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state); + struct intel_plane *plane; int min_cdclk = 0; - /* - * When we decide to use only one VDSC engine, since - * each VDSC operates with 1 ppc throughput, pixel clock - * cannot be higher than the VDSC clock (cdclk) - * If there 2 VDSC engines, then pixel clock can't be higher than - * VDSC clock(cdclk) * 2 and so on. - */ - min_cdclk = max_t(int, min_cdclk, - DIV_ROUND_UP(crtc_state->pixel_rate, num_vdsc_instances)); - - if (crtc_state->joiner_pipes) { - int pixel_clock = intel_dp_mode_to_fec_clock(crtc_state->hw.adjusted_mode.clock); - - /* - * According to Bigjoiner bw check: - * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock - * - * We have already computed compressed_bpp, so now compute the min CDCLK that - * is required to support this compressed_bpp. - * - * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) - * - * Since PPC = 2 with bigjoiner - * => CDCLK >= compressed_bpp * Pixel clock / 2 * Bigjoiner Interface bits - */ - int bigjoiner_interface_bits = DISPLAY_VER(display) >= 14 ? 36 : 24; - int min_cdclk_bj = - (fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) * - pixel_clock) / (2 * bigjoiner_interface_bits); - - min_cdclk = max(min_cdclk, min_cdclk_bj); - } + for_each_intel_plane_on_crtc(display->drm, crtc, plane) + min_cdclk = max(min_cdclk, crtc_state->min_cdclk[plane->id]); return min_cdclk; } int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - struct drm_i915_private *dev_priv = to_i915(display->drm); int min_cdclk; if (!crtc_state->hw.enable) return 0; min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) - min_cdclk = DIV_ROUND_UP(min_cdclk * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, - * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else - * there may be audio corruption or screen corruption." This cdclk - * restriction for GLK is 316.8 MHz. - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) { - if (DISPLAY_VER(display) == 10) { - /* Display WA #1145: glk */ - min_cdclk = max(316800, min_cdclk); - } else if (DISPLAY_VER(display) == 9 || IS_BROADWELL(dev_priv)) { - /* Display WA #1144: skl,bxt */ - min_cdclk = max(432000, min_cdclk); - } - } - - /* - * According to BSpec, "The CD clock frequency must be at least twice - * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. - */ - if (crtc_state->has_audio && DISPLAY_VER(display) >= 9) - min_cdclk = max(2 * 96000, min_cdclk); - - /* - * "For DP audio configuration, cdclk frequency shall be set to - * meet the following requirements: - * DP Link Frequency(MHz) | Cdclk frequency(MHz) - * 270 | 320 or higher - * 162 | 200 or higher" - */ - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio) - min_cdclk = max(crtc_state->port_clock, min_cdclk); - - /* - * On Valleyview some DSI panels lose (v|h)sync when the clock is lower - * than 320000KHz. - */ - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) && - IS_VALLEYVIEW(dev_priv)) - min_cdclk = max(320000, min_cdclk); - - /* - * On Geminilake once the CDCLK gets as low as 79200 - * picture gets unstable, despite that values are - * correct for DSI PLL and DE PLL. - */ - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) && - IS_GEMINILAKE(dev_priv)) - min_cdclk = max(158400, min_cdclk); - - /* Account for additional needs from the planes */ - min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); - - if (crtc_state->dsc.compression_enable) - min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, hsw_ips_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, intel_audio_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, vlv_dsi_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, intel_planes_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); return min_cdclk; } @@ -2960,7 +2869,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) min_cdclk = max(cdclk_state->force_min_cdclk, cdclk_state->bw_min_cdclk); for_each_pipe(display, pipe) - min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + min_cdclk = max(min_cdclk, cdclk_state->min_cdclk[pipe]); /* * Avoid glk_force_audio_cdclk() causing excessive screen @@ -2972,7 +2881,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) */ if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes && !is_power_of_2(cdclk_state->active_pipes)) - min_cdclk = max(2 * 96000, min_cdclk); + min_cdclk = max(min_cdclk, 2 * 96000); if (min_cdclk > display->cdclk.max_cdclk_freq) { drm_dbg_kms(display->drm, @@ -3028,8 +2937,8 @@ static int bxt_compute_min_voltage_level(struct intel_atomic_state *state) min_voltage_level = 0; for_each_pipe(display, pipe) - min_voltage_level = max(cdclk_state->min_voltage_level[pipe], - min_voltage_level); + min_voltage_level = max(min_voltage_level, + cdclk_state->min_voltage_level[pipe]); return min_voltage_level; } @@ -3308,14 +3217,13 @@ int intel_cdclk_state_set_joined_mbus(struct intel_atomic_state *state, bool joi int intel_cdclk_init(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_cdclk_state *cdclk_state; cdclk_state = kzalloc(sizeof(*cdclk_state), GFP_KERNEL); if (!cdclk_state) return -ENOMEM; - intel_atomic_global_obj_init(dev_priv, &display->cdclk.obj, + intel_atomic_global_obj_init(display, &display->cdclk.obj, &cdclk_state->base, &intel_cdclk_funcs); return 0; @@ -3452,20 +3360,11 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) static int intel_compute_max_dotclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); + int ppc = intel_cdclk_ppc(display, HAS_DOUBLE_WIDE(display)); + int guardband = intel_cdclk_guardband(display); int max_cdclk_freq = display->cdclk.max_cdclk_freq; - if (DISPLAY_VER(display) >= 10) - return 2 * max_cdclk_freq; - else if (DISPLAY_VER(display) == 9 || - IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (DISPLAY_VER(display) < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; + return ppc * max_cdclk_freq * guardband / 100; } /** diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 174753625bca..2f51eccdb27a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -22,6 +22,7 @@ * */ +#include "i915_drv.h" #include "i9xx_plane_regs.h" #include "intel_color.h" #include "intel_color_regs.h" @@ -1343,6 +1344,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state, intel_de_write_fw(display, reg, val); } +static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (crtc_state->dsb_color_vblank) + intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val); + else + intel_de_write_fw(display, reg, val); +} + static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, const struct drm_property_blob *blob) { @@ -1357,19 +1369,29 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, lut = blob->data; /* - * DSB fails to correctly load the legacy LUT - * unless we either write each entry twice, - * or use non-posted writes + * DSB fails to correctly load the legacy LUT unless + * we either write each entry twice when using posted + * writes, or we use non-posted writes. + * + * If palette anti-collision is active during LUT + * register writes: + * - posted writes simply get dropped and thus the LUT + * contents may not be correctly updated + * - non-posted writes are blocked and thus the LUT + * contents are always correct, but simultaneous CPU + * MMIO access will start to fail + * + * Choose the lesser of two evils and use posted writes. + * Using posted writes is also faster, even when having + * to write each register twice. */ - if (crtc_state->dsb_color_vblank) - intel_dsb_nonpost_start(crtc_state->dsb_color_vblank); - - for (i = 0; i < 256; i++) + for (i = 0; i < 256; i++) { ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i])); - - if (crtc_state->dsb_color_vblank) - intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); + if (crtc_state->dsb_color_vblank) + ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), + i9xx_lut_8(&lut[i])); + } } static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, @@ -1458,8 +1480,8 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state, prec_index); for (i = 0; i < lut_size; i++) - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_10(&lut[i])); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_10(&lut[i])); /* * Reset the index, otherwise it prevents the legacy palette to be @@ -1612,16 +1634,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state, * ToDo: Extend to max 7.0. Enable 32 bit input value * as compared to just 16 to achieve this. */ - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), - DISPLAY_VER(display) >= 14 ? - mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), + DISPLAY_VER(display) >= 14 ? + mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); } /* Clamp values > 1.0. */ while (i++ < glk_degamma_lut_size(display)) - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), - DISPLAY_VER(display) >= 14 ? - 1 << 24 : 1 << 16); + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), + DISPLAY_VER(display) >= 14 ? + 1 << 24 : 1 << 16); ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0); } @@ -1687,10 +1709,10 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); } ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe), @@ -1726,10 +1748,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* @@ -1747,10 +1769,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } ilk_lut_write(crtc_state, PREC_PAL_INDEX(pipe), diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 3252dab56430..4fbe2e3542ca 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -3,6 +3,7 @@ * Copyright © 2018 Intel Corporation */ +#include "i915_drv.h" #include "i915_reg.h" #include "intel_combo_phy.h" #include "intel_combo_phy_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 69667cd49ec8..968ac705c3c6 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -38,6 +38,7 @@ #include "i915_reg.h" #include "intel_connector.h" #include "intel_crt.h" +#include "intel_crt_regs.h" #include "intel_crtc.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" @@ -55,18 +56,23 @@ #include "intel_pch_refclk.h" /* Here's the desired hotplug mode */ -#define ADPA_HOTPLUG_BITS (ADPA_CRT_HOTPLUG_PERIOD_128 | \ +#define ADPA_HOTPLUG_BITS (ADPA_CRT_HOTPLUG_ENABLE | \ + ADPA_CRT_HOTPLUG_PERIOD_128 | \ ADPA_CRT_HOTPLUG_WARMUP_10MS | \ ADPA_CRT_HOTPLUG_SAMPLE_4S | \ ADPA_CRT_HOTPLUG_VOLTAGE_50 | \ - ADPA_CRT_HOTPLUG_VOLREF_325MV | \ - ADPA_CRT_HOTPLUG_ENABLE) + ADPA_CRT_HOTPLUG_VOLREF_325MV) +#define ADPA_HOTPLUG_MASK (ADPA_CRT_HOTPLUG_MONITOR_MASK | \ + ADPA_CRT_HOTPLUG_ENABLE | \ + ADPA_CRT_HOTPLUG_PERIOD_MASK | \ + ADPA_CRT_HOTPLUG_WARMUP_MASK | \ + ADPA_CRT_HOTPLUG_SAMPLE_MASK | \ + ADPA_CRT_HOTPLUG_VOLTAGE_MASK | \ + ADPA_CRT_HOTPLUG_VOLREF_MASK | \ + ADPA_CRT_HOTPLUG_FORCE_TRIGGER) struct intel_crt { struct intel_encoder base; - /* DPMS state is stored in the connector, which we need in the - * encoder's enable/disable callbacks */ - struct intel_connector *connector; bool force_hotplug_required; i915_reg_t adpa_reg; }; @@ -91,9 +97,9 @@ bool intel_crt_port_enabled(struct intel_display *display, /* asserts want to know the pipe even if the port is disabled */ if (HAS_PCH_CPT(dev_priv)) - *pipe = (val & ADPA_PIPE_SEL_MASK_CPT) >> ADPA_PIPE_SEL_SHIFT_CPT; + *pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK_CPT, val); else - *pipe = (val & ADPA_PIPE_SEL_MASK) >> ADPA_PIPE_SEL_SHIFT; + *pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK, val); return val & ADPA_DAC_ENABLE; } @@ -141,27 +147,27 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) } static void intel_crt_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *crtc_state) { - pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); + crtc_state->output_types |= BIT(INTEL_OUTPUT_ANALOG); - pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); + crtc_state->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); - pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock; + crtc_state->hw.adjusted_mode.crtc_clock = crtc_state->port_clock; } static void hsw_crt_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *crtc_state) { - lpt_pch_get_config(pipe_config); + lpt_pch_get_config(crtc_state); - hsw_ddi_get_config(encoder, pipe_config); + hsw_ddi_get_config(encoder, crtc_state); - pipe_config->hw.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | - DRM_MODE_FLAG_NHSYNC | - DRM_MODE_FLAG_PVSYNC | - DRM_MODE_FLAG_NVSYNC); - pipe_config->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); + crtc_state->hw.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | + DRM_MODE_FLAG_NHSYNC | + DRM_MODE_FLAG_PVSYNC | + DRM_MODE_FLAG_NVSYNC); + crtc_state->hw.adjusted_mode.flags |= intel_crt_get_flags(encoder); } /* Note: The caller is required to filter out dpms modes not supported by the @@ -244,7 +250,7 @@ static void hsw_disable_crt(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_display *display = to_intel_display(state); + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); drm_WARN_ON(display->drm, !old_crtc_state->has_pch_encoder); @@ -257,7 +263,7 @@ static void hsw_post_disable_crt(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_display *display = to_intel_display(state); + struct intel_display *display = to_intel_display(encoder); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -287,7 +293,7 @@ static void hsw_pre_pll_enable_crt(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_display *display = to_intel_display(state); + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); drm_WARN_ON(display->drm, !crtc_state->has_pch_encoder); @@ -300,7 +306,7 @@ static void hsw_pre_enable_crt(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_display *display = to_intel_display(state); + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; @@ -319,7 +325,7 @@ static void hsw_enable_crt(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct intel_display *display = to_intel_display(state); + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum pipe pipe = crtc->pipe; @@ -355,8 +361,7 @@ intel_crt_mode_valid(struct drm_connector *connector, const struct drm_display_mode *mode) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->dev); int max_dotclk = display->cdclk.max_dotclk_freq; enum drm_mode_status status; int max_clock; @@ -399,48 +404,48 @@ intel_crt_mode_valid(struct drm_connector *connector, } static int intel_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, + struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = - &pipe_config->hw.adjusted_mode; + &crtc_state->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; - pipe_config->sink_format = INTEL_OUTPUT_FORMAT_RGB; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + crtc_state->sink_format = INTEL_OUTPUT_FORMAT_RGB; + crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; return 0; } static int pch_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, + struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = - &pipe_config->hw.adjusted_mode; + &crtc_state->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; - pipe_config->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(pipe_config)) + crtc_state->has_pch_encoder = true; + if (!intel_fdi_compute_pipe_bpp(crtc_state)) return -EINVAL; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; return 0; } static int hsw_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, + struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = - &pipe_config->hw.adjusted_mode; + &crtc_state->hw.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -450,30 +455,30 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_hblank_start > 4096) return -EINVAL; - pipe_config->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(pipe_config)) + crtc_state->has_pch_encoder = true; + if (!intel_fdi_compute_pipe_bpp(crtc_state)) return -EINVAL; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; /* LPT FDI RX only supports 8bpc. */ if (HAS_PCH_LPT(dev_priv)) { /* TODO: Check crtc_state->max_link_bpp_x16 instead of bw_constrained */ - if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) { + if (crtc_state->bw_constrained && crtc_state->pipe_bpp < 24) { drm_dbg_kms(display->drm, "LPT only supports 24bpp\n"); return -EINVAL; } - pipe_config->pipe_bpp = 24; + crtc_state->pipe_bpp = 24; } /* FDI must always be 2.7 GHz */ - pipe_config->port_clock = 135000 * 2; + crtc_state->port_clock = 135000 * 2; - pipe_config->enhanced_framing = true; + crtc_state->enhanced_framing = true; - adjusted_mode->crtc_clock = lpt_iclkip(pipe_config); + adjusted_mode->crtc_clock = lpt_iclkip(crtc_state); return 0; } @@ -481,9 +486,8 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, static bool ilk_crt_detect_hotplug(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_device *dev = connector->dev; struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->dev); u32 adpa; bool ret; @@ -532,9 +536,8 @@ static bool ilk_crt_detect_hotplug(struct drm_connector *connector) static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_device *dev = connector->dev; struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->dev); bool reenable_hpd; u32 adpa; bool ret; @@ -588,8 +591,7 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) static bool intel_crt_detect_hotplug(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->dev); u32 stat; bool ret = false; int i, tries = 0; @@ -856,7 +858,7 @@ intel_crt_detect(struct drm_connector *connector, struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); - struct intel_encoder *intel_encoder = &crt->base; + struct intel_encoder *encoder = &crt->base; struct drm_atomic_state *state; intel_wakeref_t wakeref; int status; @@ -865,15 +867,14 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); - if (!intel_display_device_enabled(dev_priv)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(dev_priv)) + if (!intel_display_driver_check_access(display)) return connector->status; if (display->params.load_detect_test) { - wakeref = intel_display_power_get(dev_priv, - intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, encoder->power_domain); goto load_detect; } @@ -881,8 +882,7 @@ intel_crt_detect(struct drm_connector *connector, if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - wakeref = intel_display_power_get(dev_priv, - intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, encoder->power_domain); if (I915_HAS_HOTPLUG(display)) { /* We can not rely on the HPD pin always being correctly wired @@ -939,7 +939,7 @@ load_detect: } out: - intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return status; } @@ -947,19 +947,17 @@ out: static int intel_crt_get_modes(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); - struct intel_encoder *intel_encoder = &crt->base; + struct intel_encoder *encoder = &crt->base; intel_wakeref_t wakeref; struct i2c_adapter *ddc; int ret; - if (!intel_display_driver_check_access(dev_priv)) + if (!intel_display_driver_check_access(display)) return drm_edid_connector_add_modes(connector); - wakeref = intel_display_power_get(dev_priv, - intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, encoder->power_domain); ret = intel_crt_ddc_get_modes(connector, connector->ddc); if (ret || !IS_G4X(dev_priv)) @@ -970,7 +968,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, ddc); out: - intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -984,7 +982,7 @@ void intel_crt_reset(struct drm_encoder *encoder) u32 adpa; adpa = intel_de_read(display, crt->adpa_reg); - adpa &= ~ADPA_CRT_HOTPLUG_MASK; + adpa &= ~ADPA_HOTPLUG_MASK; adpa |= ADPA_HOTPLUG_BITS; intel_de_write(display, crt->adpa_reg, adpa); intel_de_posting_read(display, crt->adpa_reg); @@ -1022,9 +1020,8 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = { void intel_crt_init(struct intel_display *display) { struct drm_i915_private *dev_priv = to_i915(display->drm); - struct drm_connector *connector; + struct intel_connector *connector; struct intel_crt *crt; - struct intel_connector *intel_connector; i915_reg_t adpa_reg; u8 ddc_pin; u32 adpa; @@ -1047,7 +1044,9 @@ void intel_crt_init(struct intel_display *display) * it and see what happens. */ intel_de_write(display, adpa_reg, - adpa | ADPA_DAC_ENABLE | ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE); + adpa | ADPA_DAC_ENABLE | + ADPA_HSYNC_CNTL_DISABLE | + ADPA_VSYNC_CNTL_DISABLE); if ((intel_de_read(display, adpa_reg) & ADPA_DAC_ENABLE) == 0) return; intel_de_write(display, adpa_reg, adpa); @@ -1057,17 +1056,15 @@ void intel_crt_init(struct intel_display *display) if (!crt) return; - intel_connector = intel_connector_alloc(); - if (!intel_connector) { + connector = intel_connector_alloc(); + if (!connector) { kfree(crt); return; } ddc_pin = display->vbt.crt_ddc_pin; - connector = &intel_connector->base; - crt->connector = intel_connector; - drm_connector_init_with_ddc(display->drm, connector, + drm_connector_init_with_ddc(display->drm, &connector->base, &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA, intel_gmbus_get_adapter(display, ddc_pin)); @@ -1075,7 +1072,7 @@ void intel_crt_init(struct intel_display *display) drm_encoder_init(display->drm, &crt->base.base, &intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC, "CRT"); - intel_connector_attach_encoder(intel_connector, &crt->base); + intel_connector_attach_encoder(connector, &crt->base); crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = BIT(INTEL_OUTPUT_DVO) | BIT(INTEL_OUTPUT_HDMI); @@ -1085,7 +1082,7 @@ void intel_crt_init(struct intel_display *display) crt->base.pipe_mask = ~0; if (DISPLAY_VER(display) != 2) - connector->interlace_allowed = true; + connector->base.interlace_allowed = true; crt->adpa_reg = adpa_reg; @@ -1095,11 +1092,11 @@ void intel_crt_init(struct intel_display *display) !dmi_check_system(intel_spurious_crt_detect)) { crt->base.hpd_pin = HPD_CRT; crt->base.hotplug = intel_encoder_hotplug; - intel_connector->polled = DRM_CONNECTOR_POLL_HPD; + connector->polled = DRM_CONNECTOR_POLL_HPD; } else { - intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT; + connector->polled = DRM_CONNECTOR_POLL_CONNECT; } - intel_connector->base.polled = intel_connector->polled; + connector->base.polled = connector->polled; if (HAS_DDI(display)) { assert_port_valid(dev_priv, PORT_E); @@ -1132,9 +1129,9 @@ void intel_crt_init(struct intel_display *display) crt->base.get_hw_state = intel_crt_get_hw_state; crt->base.enable = intel_enable_crt; } - intel_connector->get_hw_state = intel_connector_get_hw_state; + connector->get_hw_state = intel_connector_get_hw_state; - drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs); + drm_connector_helper_add(&connector->base, &intel_crt_connector_helper_funcs); /* * TODO: find a proper way to discover whether we need to set the the diff --git a/drivers/gpu/drm/i915/display/intel_crt_regs.h b/drivers/gpu/drm/i915/display/intel_crt_regs.h new file mode 100644 index 000000000000..571a67ae9afa --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_crt_regs.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __INTEL_CRT_REGS_H__ +#define __INTEL_CRT_REGS_H__ + +#include "intel_display_reg_defs.h" + +#define ADPA _MMIO(0x61100) +#define PCH_ADPA _MMIO(0xe1100) +#define VLV_ADPA _MMIO(VLV_DISPLAY_BASE + 0x61100) +#define ADPA_DAC_ENABLE REG_BIT(31) +#define ADPA_PIPE_SEL_MASK REG_BIT(30) +#define ADPA_PIPE_SEL(pipe) REG_FIELD_PREP(ADPA_PIPE_SEL_MASK, (pipe)) +#define ADPA_PIPE_SEL_MASK_CPT REG_GENMASK(30, 29) +#define ADPA_PIPE_SEL_CPT(pipe) REG_FIELD_PREP(ADPA_PIPE_SEL_MASK_CPT, (pipe)) +#define ADPA_CRT_HOTPLUG_MONITOR_MASK REG_GENMASK(25, 24) +#define ADPA_CRT_HOTPLUG_MONITOR_NONE REG_FIELD_PREP(ADPA_CRT_HOTPLUG_MONITOR_MASK, 0) +#define ADPA_CRT_HOTPLUG_MONITOR_COLOR REG_FIELD_PREP(ADPA_CRT_HOTPLUG_MONITOR_MASK, 3) +#define ADPA_CRT_HOTPLUG_MONITOR_MONO REG_FIELD_PREP(ADPA_CRT_HOTPLUG_MONITOR_MASK, 2) +#define ADPA_CRT_HOTPLUG_ENABLE REG_BIT(23) +#define ADPA_CRT_HOTPLUG_PERIOD_MASK REG_BIT(22) +#define ADPA_CRT_HOTPLUG_PERIOD_64 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_PERIOD_MASK, 0) +#define ADPA_CRT_HOTPLUG_PERIOD_128 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_PERIOD_MASK, 1) +#define ADPA_CRT_HOTPLUG_WARMUP_MASK REG_BIT(21) +#define ADPA_CRT_HOTPLUG_WARMUP_5MS REG_FIELD_PREP(ADPA_CRT_HOTPLUG_WARMUP_MASK, 0) +#define ADPA_CRT_HOTPLUG_WARMUP_10MS REG_FIELD_PREP(ADPA_CRT_HOTPLUG_WARMUP_MASK, 1) +#define ADPA_CRT_HOTPLUG_SAMPLE_MASK REG_BIT(20) +#define ADPA_CRT_HOTPLUG_SAMPLE_2S REG_FIELD_PREP(ADPA_CRT_HOTPLUG_SAMPLE_MASK, 0) +#define ADPA_CRT_HOTPLUG_SAMPLE_4S REG_FIELD_PREP(ADPA_CRT_HOTPLUG_SAMPLE_MASK, 1) +#define ADPA_CRT_HOTPLUG_VOLTAGE_MASK REG_GENMASK(19, 18) +#define ADPA_CRT_HOTPLUG_VOLTAGE_40 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLTAGE_MASK, 0) +#define ADPA_CRT_HOTPLUG_VOLTAGE_50 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLTAGE_MASK, 1) +#define ADPA_CRT_HOTPLUG_VOLTAGE_60 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLTAGE_MASK, 2) +#define ADPA_CRT_HOTPLUG_VOLTAGE_70 REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLTAGE_MASK, 3) +#define ADPA_CRT_HOTPLUG_VOLREF_MASK REG_BIT(17) +#define ADPA_CRT_HOTPLUG_VOLREF_325MV REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLREF_MASK, 0) +#define ADPA_CRT_HOTPLUG_VOLREF_475MV REG_FIELD_PREP(ADPA_CRT_HOTPLUG_VOLREF_MASK, 1) +#define ADPA_CRT_HOTPLUG_FORCE_TRIGGER REG_BIT(16) +#define ADPA_USE_VGA_HVPOLARITY REG_BIT(15) +#define ADPA_HSYNC_CNTL_DISABLE REG_BIT(11) +#define ADPA_VSYNC_CNTL_DISABLE REG_BIT(10) +#define ADPA_VSYNC_ACTIVE_HIGH REG_BIT(4) +#define ADPA_HSYNC_ACTIVE_HIGH REG_BIT(3) + +#define _VGA_MSR_WRITE _MMIO(0x3c2) + +#endif /* __INTEL_CRT_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index a2c528d707f4..c910168602d2 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -12,6 +12,7 @@ #include <drm/drm_vblank.h> #include <drm/drm_vblank_work.h> +#include "i915_drv.h" #include "i915_vgpu.h" #include "i9xx_plane.h" #include "icl_dsi.h" diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 705ec5ad385c..1faef60be472 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -50,16 +50,6 @@ intel_dump_infoframe(struct drm_i915_private *i915, hdmi_infoframe_log(KERN_DEBUG, i915->drm.dev, frame); } -static void -intel_dump_buffer(const char *prefix, const u8 *buf, size_t len) -{ - if (!drm_debug_enabled(DRM_UT_KMS)) - return; - - print_hex_dump(KERN_DEBUG, prefix, DUMP_PREFIX_NONE, - 16, 0, buf, len, false); -} - #define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x static const char * const output_type_str[] = { @@ -293,8 +283,8 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_dp_as_sdp_log(&p, &pipe_config->infoframes.as_sdp); if (pipe_config->has_audio) - intel_dump_buffer("ELD: ", pipe_config->eld, - drm_eld_size(pipe_config->eld)); + drm_print_hex_dump(&p, "ELD: ", pipe_config->eld, + drm_eld_size(pipe_config->eld)); drm_printf(&p, "vrr: %s, vmin: %d, vmax: %d, pipeline full: %d, guardband: %d flipline: %d, vmin vblank: %d, vmax vblank: %d\n", str_yes_no(pipe_config->vrr.enable), diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 9ba77970dab7..57cf8f46a458 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -11,6 +11,7 @@ #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" @@ -619,7 +620,6 @@ static void skl_write_cursor_wm(struct intel_dsb *dsb, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(plane->base.dev); - struct drm_i915_private *i915 = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; @@ -627,14 +627,14 @@ static void skl_write_cursor_wm(struct intel_dsb *dsb, &crtc_state->wm.skl.plane_ddb[plane_id]; int level; - for (level = 0; level < i915->display.wm.num_levels; level++) + for (level = 0; level < display->wm.num_levels; level++) intel_de_write_dsb(display, dsb, CUR_WM(pipe, level), skl_cursor_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); intel_de_write_dsb(display, dsb, CUR_WM_TRANS(pipe), skl_cursor_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); - if (HAS_HW_SAGV_WM(i915)) { + if (HAS_HW_SAGV_WM(display)) { const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; intel_de_write_dsb(display, dsb, CUR_WM_SAGV(pipe), diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 71dc659228ab..e768dc6a15b3 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -5,6 +5,8 @@ #include <linux/log2.h> #include <linux/math64.h> + +#include "i915_drv.h" #include "i915_reg.h" #include "intel_cx0_phy.h" #include "intel_cx0_phy_regs.h" @@ -2115,14 +2117,6 @@ static void intel_c10_pll_program(struct intel_display *display, 0, C10_VDR_CTRL_MSGBUS_ACCESS, MB_WRITE_COMMITTED); - /* Custom width needs to be programmed to 0 for both the phy lanes */ - intel_cx0_rmw(encoder, INTEL_CX0_BOTH_LANES, PHY_C10_VDR_CUSTOM_WIDTH, - C10_VDR_CUSTOM_WIDTH_MASK, C10_VDR_CUSTOM_WIDTH_8_10, - MB_WRITE_COMMITTED); - intel_cx0_rmw(encoder, INTEL_CX0_BOTH_LANES, PHY_C10_VDR_CONTROL(1), - 0, C10_VDR_CTRL_UPDATE_CFG, - MB_WRITE_COMMITTED); - /* Program the pll values only for the master lane */ for (i = 0; i < ARRAY_SIZE(pll_state->pll); i++) intel_cx0_write(encoder, INTEL_CX0_LANE0, PHY_C10_VDR_PLL(i), @@ -2132,6 +2126,10 @@ static void intel_c10_pll_program(struct intel_display *display, intel_cx0_write(encoder, INTEL_CX0_LANE0, PHY_C10_VDR_CMN(0), pll_state->cmn, MB_WRITE_COMMITTED); intel_cx0_write(encoder, INTEL_CX0_LANE0, PHY_C10_VDR_TX(0), pll_state->tx, MB_WRITE_COMMITTED); + /* Custom width needs to be programmed to 0 for both the phy lanes */ + intel_cx0_rmw(encoder, INTEL_CX0_BOTH_LANES, PHY_C10_VDR_CUSTOM_WIDTH, + C10_VDR_CUSTOM_WIDTH_MASK, C10_VDR_CUSTOM_WIDTH_8_10, + MB_WRITE_COMMITTED); intel_cx0_rmw(encoder, INTEL_CX0_LANE0, PHY_C10_VDR_CONTROL(1), 0, C10_VDR_CTRL_MASTER_LANE | C10_VDR_CTRL_UPDATE_CFG, MB_WRITE_COMMITTED); @@ -2987,7 +2985,7 @@ static void intel_cx0pll_enable(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + bool lane_reversal = dig_port->lane_reversal; u8 maxpclk_lane = lane_reversal ? INTEL_CX0_LANE1 : INTEL_CX0_LANE0; intel_wakeref_t wakeref = intel_cx0_phy_transaction_begin(encoder); @@ -3070,7 +3068,10 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder) val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port)); - clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); + if (DISPLAY_VER(display) >= 30) + clock = REG_FIELD_GET(XE3_DDI_CLOCK_SELECT_MASK, val); + else + clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); drm_WARN_ON(display->drm, !(val & XELPDP_FORWARD_CLOCK_UNGATE)); drm_WARN_ON(display->drm, !(val & XELPDP_TBT_CLOCK_REQUEST)); @@ -3085,13 +3086,18 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder) return 540000; case XELPDP_DDI_CLOCK_SELECT_TBT_810: return 810000; + case XELPDP_DDI_CLOCK_SELECT_TBT_312_5: + return 1000000; + case XELPDP_DDI_CLOCK_SELECT_TBT_625: + return 2000000; default: MISSING_CASE(clock); return 162000; } } -static int intel_mtl_tbt_clock_select(int clock) +static int intel_mtl_tbt_clock_select(struct intel_display *display, + int clock) { switch (clock) { case 162000: @@ -3102,6 +3108,18 @@ static int intel_mtl_tbt_clock_select(int clock) return XELPDP_DDI_CLOCK_SELECT_TBT_540; case 810000: return XELPDP_DDI_CLOCK_SELECT_TBT_810; + case 1000000: + if (DISPLAY_VER(display) < 30) { + drm_WARN_ON(display->drm, "UHBR10 not supported for the platform\n"); + return XELPDP_DDI_CLOCK_SELECT_TBT_162; + } + return XELPDP_DDI_CLOCK_SELECT_TBT_312_5; + case 2000000: + if (DISPLAY_VER(display) < 30) { + drm_WARN_ON(display->drm, "UHBR20 not supported for the platform\n"); + return XELPDP_DDI_CLOCK_SELECT_TBT_162; + } + return XELPDP_DDI_CLOCK_SELECT_TBT_625; default: MISSING_CASE(clock); return XELPDP_DDI_CLOCK_SELECT_TBT_162; @@ -3114,15 +3132,26 @@ static void intel_mtl_tbt_pll_enable(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); u32 val = 0; + u32 mask; /* * 1. Program PORT_CLOCK_CTL REGISTER to configure * clock muxes, gating and SSC */ - val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(crtc_state->port_clock)); + + if (DISPLAY_VER(display) >= 30) { + mask = XE3_DDI_CLOCK_SELECT_MASK; + val |= XE3_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); + } else { + mask = XELPDP_DDI_CLOCK_SELECT_MASK; + val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); + } + + mask |= XELPDP_FORWARD_CLOCK_UNGATE; val |= XELPDP_FORWARD_CLOCK_UNGATE; + intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_FORWARD_CLOCK_UNGATE, val); + mask, val); /* 2. Read back PORT_CLOCK_CTL REGISTER */ val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port)); diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index f0e5c196eae4..da154ff26b96 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -9,6 +9,11 @@ #include "i915_reg_defs.h" #include "intel_display_limits.h" +/* DDI Buffer Control */ +#define _DDI_CLK_VALFREQ_A 0x64030 +#define _DDI_CLK_VALFREQ_B 0x64130 +#define DDI_CLK_VALFREQ(port) _MMIO_PORT(port, _DDI_CLK_VALFREQ_A, _DDI_CLK_VALFREQ_B) + /* * Wrapper macro to convert from port number to the index used in some of the * registers. For Display version 20 and above it converts the port number to a @@ -187,7 +192,9 @@ #define XELPDP_TBT_CLOCK_REQUEST REG_BIT(19) #define XELPDP_TBT_CLOCK_ACK REG_BIT(18) #define XELPDP_DDI_CLOCK_SELECT_MASK REG_GENMASK(15, 12) +#define XE3_DDI_CLOCK_SELECT_MASK REG_GENMASK(16, 12) #define XELPDP_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XELPDP_DDI_CLOCK_SELECT_MASK, val) +#define XE3_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XE3_DDI_CLOCK_SELECT_MASK, val) #define XELPDP_DDI_CLOCK_SELECT_NONE 0x0 #define XELPDP_DDI_CLOCK_SELECT_MAXPCLK 0x8 #define XELPDP_DDI_CLOCK_SELECT_DIV18CLK 0x9 @@ -195,11 +202,20 @@ #define XELPDP_DDI_CLOCK_SELECT_TBT_270 0xd #define XELPDP_DDI_CLOCK_SELECT_TBT_540 0xe #define XELPDP_DDI_CLOCK_SELECT_TBT_810 0xf +#define XELPDP_DDI_CLOCK_SELECT_TBT_312_5 0x18 +#define XELPDP_DDI_CLOCK_SELECT_TBT_625 0x19 #define XELPDP_FORWARD_CLOCK_UNGATE REG_BIT(10) #define XELPDP_LANE1_PHY_CLOCK_SELECT REG_BIT(8) #define XELPDP_SSC_ENABLE_PLLA REG_BIT(1) #define XELPDP_SSC_ENABLE_PLLB REG_BIT(0) +#define TCSS_DISP_MAILBOX_IN_CMD _MMIO(0x161300) +#define TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY REG_BIT(31) +#define TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK REG_GENMASK(7, 0) +#define TCSS_DISP_MAILBOX_IN_CMD_DATA(val) REG_FIELD_PREP(TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK, val) + +#define TCSS_DISP_MAILBOX_IN_DATA _MMIO(0x161304) + /* C10 Vendor Registers */ #define PHY_C10_VDR_PLL(idx) (0xC00 + (idx)) #define C10_PLL0_FRACEN REG_BIT8(4) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a70e7f263a89..0aec4b958af6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -28,6 +28,7 @@ #include <linux/iopoll.h> #include <linux/string_helpers.h> +#include <drm/display/drm_dp_helper.h> #include <drm/display/drm_scdc_helper.h> #include <drm/drm_privacy_screen_consumer.h> @@ -335,10 +336,14 @@ static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* DDI_BUF_CTL_ENABLE will be set by intel_ddi_prepare_link_retrain() later */ - intel_dp->DP = dig_port->saved_port_bits | - DDI_PORT_WIDTH(crtc_state->lane_count) | + intel_dp->DP = DDI_PORT_WIDTH(crtc_state->lane_count) | DDI_BUF_TRANS_SELECT(0); + if (dig_port->lane_reversal) + intel_dp->DP |= DDI_BUF_PORT_REVERSAL; + if (dig_port->ddi_a_4_lanes) + intel_dp->DP |= DDI_A_4_LANES; + if (DISPLAY_VER(i915) >= 14) { if (intel_dp_is_uhbr(crtc_state)) intel_dp->DP |= DDI_BUF_PORT_DATA_40BIT; @@ -455,17 +460,20 @@ static u32 bdw_trans_port_sync_master_select(enum transcoder master_transcoder) } static void -intel_ddi_config_transcoder_dp2(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +intel_ddi_config_transcoder_dp2(const struct intel_crtc_state *crtc_state, + bool enable) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 val = 0; - if (intel_dp_is_uhbr(crtc_state)) + if (!HAS_DP20(display)) + return; + + if (enable && intel_dp_is_uhbr(crtc_state)) val = TRANS_DP2_128B132B_CHANNEL_CODING; - intel_de_write(i915, TRANS_DP2_CTL(cpu_transcoder), val); + intel_de_write(display, TRANS_DP2_CTL(cpu_transcoder), val); } /* @@ -554,7 +562,8 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { temp |= TRANS_DDI_MODE_SELECT_FDI_OR_128B132B; temp |= (crtc_state->fdi_lanes - 1) << 1; - } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) || + intel_dp_is_uhbr(crtc_state)) { if (intel_dp_is_uhbr(crtc_state)) temp |= TRANS_DDI_MODE_SELECT_FDI_OR_128B132B; else @@ -617,9 +626,10 @@ void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, /* * Same as intel_ddi_enable_transcoder_func(), but it does not set the enable - * bit. + * bit for the DDI function and enables the DP2 configuration. Called for all + * transcoder types. */ -static void +void intel_ddi_config_transcoder_func(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -628,18 +638,27 @@ intel_ddi_config_transcoder_func(struct intel_encoder *encoder, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 ctl; + intel_ddi_config_transcoder_dp2(crtc_state, true); + ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state); ctl &= ~TRANS_DDI_FUNC_ENABLE; intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder), ctl); } +/* + * Disable the DDI function and port syncing. + * For SST, pre-TGL MST, TGL+ MST-slave transcoders: deselect the DDI port, + * SST/MST mode and disable the DP2 configuration. For TGL+ MST-master + * transcoders these are done later in intel_ddi_post_disable_dp(). + */ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); u32 ctl; if (DISPLAY_VER(dev_priv) >= 11) @@ -659,7 +678,8 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK); if (DISPLAY_VER(dev_priv) >= 12) { - if (!intel_dp_mst_is_master_trans(crtc_state)) { + if (!intel_dp_mst_is_master_trans(crtc_state) || + (!is_mst && intel_dp_is_uhbr(crtc_state))) { ctl &= ~(TGL_TRANS_DDI_PORT_MASK | TRANS_DDI_MODE_SELECT_MASK); } @@ -670,6 +690,9 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder), ctl); + if (intel_dp_mst_is_slave_trans(crtc_state)) + intel_ddi_config_transcoder_dp2(crtc_state, false); + if (intel_has_quirk(display, QUIRK_INCREASE_DDI_DISABLED_TIME) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { drm_dbg_kms(display->drm, "Quirk Increase DDI disabled time\n"); @@ -700,15 +723,15 @@ int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder, bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) { - struct drm_device *dev = intel_connector->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(intel_connector); + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *encoder = intel_attached_encoder(intel_connector); int type = intel_connector->base.connector_type; enum port port = encoder->port; enum transcoder cpu_transcoder; intel_wakeref_t wakeref; enum pipe pipe = 0; - u32 tmp; + u32 ddi_mode; bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -716,6 +739,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) if (!wakeref) return false; + /* Note: This returns false for DP MST primary encoders. */ if (!encoder->get_hw_state(encoder, &pipe)) { ret = false; goto out; @@ -726,38 +750,28 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) else cpu_transcoder = (enum transcoder) pipe; - tmp = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder)); + ddi_mode = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder)) & + TRANS_DDI_MODE_SELECT_MASK; - switch (tmp & TRANS_DDI_MODE_SELECT_MASK) { - case TRANS_DDI_MODE_SELECT_HDMI: - case TRANS_DDI_MODE_SELECT_DVI: + if (ddi_mode == TRANS_DDI_MODE_SELECT_HDMI || + ddi_mode == TRANS_DDI_MODE_SELECT_DVI) { ret = type == DRM_MODE_CONNECTOR_HDMIA; - break; - - case TRANS_DDI_MODE_SELECT_DP_SST: + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && !HAS_DP20(display)) { + ret = type == DRM_MODE_CONNECTOR_VGA; + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_SST) { ret = type == DRM_MODE_CONNECTOR_eDP || - type == DRM_MODE_CONNECTOR_DisplayPort; - break; - - case TRANS_DDI_MODE_SELECT_DP_MST: - /* if the transcoder is in MST state then - * connector isn't connected */ + type == DRM_MODE_CONNECTOR_DisplayPort; + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) { + /* + * encoder->get_hw_state() should have bailed out on MST. This + * must be SST and non-eDP. + */ + ret = type == DRM_MODE_CONNECTOR_DisplayPort; + } else if (drm_WARN_ON(display->drm, ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST)) { + /* encoder->get_hw_state() should have bailed out on MST. */ ret = false; - break; - - case TRANS_DDI_MODE_SELECT_FDI_OR_128B132B: - if (HAS_DP20(dev_priv)) - /* 128b/132b */ - ret = false; - else - /* FDI */ - ret = type == DRM_MODE_CONNECTOR_VGA; - break; - - default: + } else { ret = false; - break; } out: @@ -769,13 +783,13 @@ out: static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, u8 *pipe_mask, bool *is_dp_mst) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(encoder); + struct drm_i915_private *dev_priv = to_i915(display->drm); enum port port = encoder->port; intel_wakeref_t wakeref; enum pipe p; u32 tmp; - u8 mst_pipe_mask; + u8 mst_pipe_mask = 0, dp128b132b_pipe_mask = 0; *pipe_mask = 0; *is_dp_mst = false; @@ -812,10 +826,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, goto out; } - mst_pipe_mask = 0; for_each_pipe(dev_priv, p) { enum transcoder cpu_transcoder = (enum transcoder)p; - unsigned int port_mask, ddi_select; + u32 port_mask, ddi_select, ddi_mode; intel_wakeref_t trans_wakeref; trans_wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -839,10 +852,12 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, if ((tmp & port_mask) != ddi_select) continue; - if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST || - (HAS_DP20(dev_priv) && - (tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B)) + ddi_mode = tmp & TRANS_DDI_MODE_SELECT_MASK; + + if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST) mst_pipe_mask |= BIT(p); + else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) + dp128b132b_pipe_mask |= BIT(p); *pipe_mask |= BIT(p); } @@ -852,6 +867,23 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, "No pipe for [ENCODER:%d:%s] found\n", encoder->base.base.id, encoder->base.name); + if (!mst_pipe_mask && dp128b132b_pipe_mask) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + /* + * If we don't have 8b/10b MST, but have more than one + * transcoder in 128b/132b mode, we know it must be 128b/132b + * MST. + * + * Otherwise, we fall back to checking the current MST + * state. It's not accurate for hardware takeover at probe, but + * we don't expect MST to have been enabled at that point, and + * can assume it's SST. + */ + if (hweight8(dp128b132b_pipe_mask) > 1 || intel_dp->is_mst) + mst_pipe_mask = dp128b132b_pipe_mask; + } + if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) { drm_dbg_kms(&dev_priv->drm, "Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n", @@ -862,9 +894,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, if (mst_pipe_mask && mst_pipe_mask != *pipe_mask) drm_dbg_kms(&dev_priv->drm, - "Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n", + "Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe masks: all %02x, MST %02x, 128b/132b %02x)\n", encoder->base.base.id, encoder->base.name, - *pipe_mask, mst_pipe_mask); + *pipe_mask, mst_pipe_mask, dp128b132b_pipe_mask); else *is_dp_mst = mst_pipe_mask; @@ -2196,8 +2228,8 @@ i915_reg_t dp_tp_ctl_reg(struct intel_encoder *encoder, return DP_TP_CTL(encoder->port); } -i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -2208,6 +2240,25 @@ i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder, return DP_TP_STATUS(encoder->port); } +void intel_ddi_clear_act_sent(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(encoder); + + intel_de_write(display, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_ACT_SENT); +} + +void intel_ddi_wait_for_act_sent(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(encoder); + + if (intel_de_wait_for_set(display, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_ACT_SENT, 1)) + drm_err(display->drm, "Timed out waiting for ACT sent\n"); +} + static void intel_dp_sink_set_msa_timing_par_ignore_state(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool enable) @@ -2376,12 +2427,10 @@ static void intel_ddi_power_up_lanes(struct intel_encoder *encoder, if (intel_encoder_is_combo(encoder)) { enum phy phy = intel_encoder_to_phy(encoder); - bool lane_reversal = - dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; intel_combo_phy_power_up_lanes(i915, phy, false, crtc_state->lane_count, - lane_reversal); + dig_port->lane_reversal); } } @@ -2506,25 +2555,24 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) static void mtl_port_buf_ctl_program(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); enum port port = encoder->port; - u32 val; + u32 val = 0; - val = intel_de_read(i915, XELPDP_PORT_BUF_CTL1(i915, port)); - val &= ~XELPDP_PORT_WIDTH_MASK; val |= XELPDP_PORT_WIDTH(mtl_get_port_width(crtc_state->lane_count)); - val &= ~XELPDP_PORT_BUF_PORT_DATA_WIDTH_MASK; if (intel_dp_is_uhbr(crtc_state)) val |= XELPDP_PORT_BUF_PORT_DATA_40BIT; else val |= XELPDP_PORT_BUF_PORT_DATA_10BIT; - if (dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL) + if (dig_port->lane_reversal) val |= XELPDP_PORT_REVERSAL; - intel_de_write(i915, XELPDP_PORT_BUF_CTL1(i915, port), val); + intel_de_rmw(display, XELPDP_PORT_BUF_CTL1(display, port), + XELPDP_PORT_WIDTH_MASK | XELPDP_PORT_BUF_PORT_DATA_WIDTH_MASK, + val); } static void mtl_port_buf_ctl_io_selection(struct intel_encoder *encoder) @@ -2547,6 +2595,7 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct intel_dp *intel_dp = enc_to_intel_dp(encoder); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); bool transparent_mode; + int ret; intel_dp_set_link_params(intel_dp, crtc_state->port_clock, @@ -2584,10 +2633,6 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* * 6.b If DP v2.0/128b mode - Configure TRANS_DP2_CTL register settings. - */ - intel_ddi_config_transcoder_dp2(encoder, crtc_state); - - /* * 6.c Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST * Transport Select */ @@ -2648,6 +2693,14 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* 6.o Configure and enable FEC if needed */ intel_ddi_enable_fec(encoder, crtc_state); + /* 7.a 128b/132b SST. */ + if (!is_mst && intel_dp_is_uhbr(crtc_state)) { + /* VCPID 1, start slot 0 for 128b/132b, tu slots */ + ret = drm_dp_dpcd_write_payload(&intel_dp->aux, 1, 0, crtc_state->dp_m_n.tu); + if (ret < 0) + intel_dp_queue_modeset_retry_for_link(state, encoder, crtc_state); + } + if (!is_mst) intel_dsc_dp_pps_write(encoder, crtc_state); } @@ -2661,6 +2714,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int ret; intel_dp_set_link_params(intel_dp, crtc_state->port_clock, @@ -2725,9 +2779,6 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, */ intel_ddi_enable_transcoder_clock(encoder, crtc_state); - if (HAS_DP20(dev_priv)) - intel_ddi_config_transcoder_dp2(encoder, crtc_state); - /* * 7.b Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST * Transport Select @@ -2790,6 +2841,13 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* 7.l Configure and enable FEC if needed */ intel_ddi_enable_fec(encoder, crtc_state); + if (!is_mst && intel_dp_is_uhbr(crtc_state)) { + /* VCPID 1, start slot 0 for 128b/132b, tu slots */ + ret = drm_dp_dpcd_write_payload(&intel_dp->aux, 1, 0, crtc_state->dp_m_n.tu); + if (ret < 0) + intel_dp_queue_modeset_retry_for_link(state, encoder, crtc_state); + } + if (!is_mst) intel_dsc_dp_pps_write(encoder, crtc_state); } @@ -2866,9 +2924,9 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); - if (HAS_DP20(dev_priv)) + if (HAS_DP20(display)) intel_dp_128b132b_sdp_crc16(enc_to_intel_dp(encoder), crtc_state); @@ -2876,9 +2934,9 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, if (crtc_state->has_panel_replay) intel_psr_enable_sink(enc_to_intel_dp(encoder), crtc_state); - if (DISPLAY_VER(dev_priv) >= 14) + if (DISPLAY_VER(display) >= 14) mtl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); - else if (DISPLAY_VER(dev_priv) >= 12) + else if (DISPLAY_VER(display) >= 12) tgl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); else hsw_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); @@ -2915,6 +2973,24 @@ static void intel_ddi_pre_enable_hdmi(struct intel_atomic_state *state, crtc_state, conn_state); } +/* + * Note: Also called from the ->pre_enable of the first active MST stream + * encoder on its primary encoder. + * + * When called from DP MST code: + * + * - conn_state will be NULL + * + * - encoder will be the primary encoder (i.e. mst->primary) + * + * - the main connector associated with this port won't be active or linked to a + * crtc + * + * - crtc_state will be the state of the first stream to be activated on this + * port, and it may not be the same stream that will be deactivated last, but + * each stream should have a state that is identical when it comes to the DP + * link parameteres + */ static void intel_ddi_pre_enable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -2924,19 +3000,6 @@ static void intel_ddi_pre_enable(struct intel_atomic_state *state, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - /* - * When called from DP MST code: - * - conn_state will be NULL - * - encoder will be the main encoder (ie. mst->primary) - * - the main connector associated with this port - * won't be active or linked to a crtc - * - crtc_state will be the state of the first stream to - * be activated on this port, and it may not be the same - * stream that will be deactivated last, but each stream - * should have a state that is identical when it comes to - * the DP link parameteres - */ - drm_WARN_ON(&dev_priv->drm, crtc_state->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); @@ -3092,6 +3155,8 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, intel_dp_sink_set_fec_ready(intel_dp, old_crtc_state, false); + intel_ddi_config_transcoder_dp2(old_crtc_state, false); + /* * From TGL spec: "If single stream or multi-stream master transcoder: * Configure Transcoder Clock select to direct no clock to the @@ -3157,7 +3222,9 @@ static void intel_ddi_post_disable_hdmi_or_sst(struct intel_atomic_state *state, { struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_crtc *pipe_crtc; + bool is_hdmi = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI); int i; for_each_pipe_crtc_modeset_disable(display, pipe_crtc, old_crtc_state, i) { @@ -3169,6 +3236,20 @@ static void intel_ddi_post_disable_hdmi_or_sst(struct intel_atomic_state *state, intel_disable_transcoder(old_crtc_state); + /* 128b/132b SST */ + if (!is_hdmi && intel_dp_is_uhbr(old_crtc_state)) { + /* VCPID 1, start slot 0 for 128b/132b, clear */ + drm_dp_dpcd_write_payload(&intel_dp->aux, 1, 0, 0); + + intel_ddi_clear_act_sent(encoder, old_crtc_state); + + intel_de_rmw(display, TRANS_DDI_FUNC_CTL(display, old_crtc_state->cpu_transcoder), + TRANS_DDI_DP_VC_PAYLOAD_ALLOC, 0); + + intel_ddi_wait_for_act_sent(encoder, old_crtc_state); + drm_dp_dpcd_poll_act_handled(&intel_dp->aux, 0); + } + intel_ddi_disable_transcoder_func(old_crtc_state); for_each_pipe_crtc_modeset_disable(display, pipe_crtc, old_crtc_state, i) { @@ -3184,6 +3265,11 @@ static void intel_ddi_post_disable_hdmi_or_sst(struct intel_atomic_state *state, } } +/* + * Note: Also called from the ->post_disable of the last active MST stream + * encoder on its primary encoder. See also the comment for + * intel_ddi_pre_enable(). + */ static void intel_ddi_post_disable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -3214,6 +3300,11 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state, old_conn_state); } +/* + * Note: Also called from the ->post_pll_disable of the last active MST stream + * encoder on its primary encoder. See also the comment for + * intel_ddi_pre_enable(). + */ static void intel_ddi_post_pll_disable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -3264,7 +3355,7 @@ static void trans_port_sync_stop_link_train(struct intel_atomic_state *state, crtc_state); } -static void intel_enable_ddi_dp(struct intel_atomic_state *state, +static void intel_ddi_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -3286,18 +3377,8 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, trans_port_sync_stop_link_train(state, encoder, crtc_state); } -/* FIXME bad home for this function */ -i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915, - enum transcoder cpu_transcoder) -{ - return DISPLAY_VER(i915) >= 14 ? - MTL_CHICKEN_TRANS(cpu_transcoder) : - CHICKEN_TRANS(cpu_transcoder); -} - static i915_reg_t -gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, - enum port port) +gen9_chicken_trans_reg_by_port(struct intel_display *display, enum port port) { static const enum transcoder trans[] = { [PORT_A] = TRANSCODER_EDP, @@ -3307,19 +3388,20 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, [PORT_E] = TRANSCODER_A, }; - drm_WARN_ON(&dev_priv->drm, DISPLAY_VER(dev_priv) < 9); + drm_WARN_ON(display->drm, DISPLAY_VER(display) < 9); - if (drm_WARN_ON(&dev_priv->drm, port < PORT_A || port > PORT_E)) + if (drm_WARN_ON(display->drm, port < PORT_A || port > PORT_E)) port = PORT_A; - return CHICKEN_TRANS(trans[port]); + return CHICKEN_TRANS(display, trans[port]); } -static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, +static void intel_ddi_enable_hdmi(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_connector *connector = conn_state->connector; @@ -3350,7 +3432,7 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, * the bits affect a specific DDI port rather than * a specific transcoder. */ - i915_reg_t reg = gen9_chicken_trans_reg_by_port(dev_priv, port); + i915_reg_t reg = gen9_chicken_trans_reg_by_port(display, port); u32 val; val = intel_de_read(dev_priv, reg); @@ -3390,14 +3472,20 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, * is filled with lane count, already set in the crtc_state. * The same is required to be filled in PORT_BUF_CTL for C10/20 Phy. */ - buf_ctl = dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE; + buf_ctl = DDI_BUF_CTL_ENABLE; + + if (dig_port->lane_reversal) + buf_ctl |= DDI_BUF_PORT_REVERSAL; + if (dig_port->ddi_a_4_lanes) + buf_ctl |= DDI_A_4_LANES; + if (DISPLAY_VER(dev_priv) >= 14) { u8 lane_count = mtl_get_port_width(crtc_state->lane_count); u32 port_buf = 0; port_buf |= XELPDP_PORT_WIDTH(lane_count); - if (dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL) + if (dig_port->lane_reversal) port_buf |= XELPDP_PORT_REVERSAL; intel_de_rmw(dev_priv, XELPDP_PORT_BUF_CTL1(dev_priv, port), @@ -3417,20 +3505,46 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, intel_wait_ddi_buf_active(encoder); } -static void intel_enable_ddi(struct intel_atomic_state *state, +static void intel_ddi_enable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); struct intel_crtc *pipe_crtc; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + bool is_hdmi = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); int i; + /* 128b/132b SST */ + if (!is_hdmi && intel_dp_is_uhbr(crtc_state)) { + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + u64 crtc_clock_hz = KHz(adjusted_mode->crtc_clock); + + intel_de_write(display, TRANS_DP2_VFREQHIGH(cpu_transcoder), + TRANS_DP2_VFREQ_PIXEL_CLOCK(crtc_clock_hz >> 24)); + intel_de_write(display, TRANS_DP2_VFREQLOW(cpu_transcoder), + TRANS_DP2_VFREQ_PIXEL_CLOCK(crtc_clock_hz & 0xffffff)); + } + intel_ddi_enable_transcoder_func(encoder, crtc_state); /* Enable/Disable DP2.0 SDP split config before transcoder */ intel_audio_sdp_split_update(crtc_state); + /* 128b/132b SST */ + if (!is_hdmi && intel_dp_is_uhbr(crtc_state)) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + intel_ddi_clear_act_sent(encoder, crtc_state); + + intel_de_rmw(display, TRANS_DDI_FUNC_CTL(display, cpu_transcoder), 0, + TRANS_DDI_DP_VC_PAYLOAD_ALLOC); + + intel_ddi_wait_for_act_sent(encoder, crtc_state); + drm_dp_dpcd_poll_act_handled(&intel_dp->aux, 0); + } + intel_enable_transcoder(crtc_state); intel_ddi_wait_for_fec_status(encoder, crtc_state, true); @@ -3442,16 +3556,16 @@ static void intel_enable_ddi(struct intel_atomic_state *state, intel_crtc_vblank_on(pipe_crtc_state); } - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - intel_enable_ddi_hdmi(state, encoder, crtc_state, conn_state); + if (is_hdmi) + intel_ddi_enable_hdmi(state, encoder, crtc_state, conn_state); else - intel_enable_ddi_dp(state, encoder, crtc_state, conn_state); + intel_ddi_enable_dp(state, encoder, crtc_state, conn_state); intel_hdcp_enable(state, encoder, crtc_state, conn_state); } -static void intel_disable_ddi_dp(struct intel_atomic_state *state, +static void intel_ddi_disable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -3472,7 +3586,7 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state, false); } -static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, +static void intel_ddi_disable_hdmi(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -3487,7 +3601,7 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, connector->base.id, connector->name); } -static void intel_disable_ddi(struct intel_atomic_state *state, +static void intel_ddi_disable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -3497,10 +3611,10 @@ static void intel_disable_ddi(struct intel_atomic_state *state, intel_hdcp_disable(to_intel_connector(old_conn_state->connector)); if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) - intel_disable_ddi_hdmi(state, encoder, old_crtc_state, + intel_ddi_disable_hdmi(state, encoder, old_crtc_state, old_conn_state); else - intel_disable_ddi_dp(state, encoder, old_crtc_state, + intel_ddi_disable_dp(state, encoder, old_crtc_state, old_conn_state); } @@ -3560,6 +3674,11 @@ void intel_ddi_update_active_dpll(struct intel_atomic_state *state, intel_update_active_dpll(state, pipe_crtc, encoder); } +/* + * Note: Also called from the ->pre_pll_enable of the first active MST stream + * encoder on its primary encoder. See also the comment for + * intel_ddi_pre_enable(). + */ static void intel_ddi_pre_pll_enable(struct intel_atomic_state *state, struct intel_encoder *encoder, @@ -3603,9 +3722,9 @@ static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder) static void mtl_ddi_prepare_link_retrain(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; u32 dp_tp_ctl; @@ -3613,21 +3732,22 @@ static void mtl_ddi_prepare_link_retrain(struct intel_dp *intel_dp, * TODO: To train with only a different voltage swing entry is not * necessary disable and enable port */ - dp_tp_ctl = intel_de_read(dev_priv, dp_tp_ctl_reg(encoder, crtc_state)); + dp_tp_ctl = intel_de_read(display, dp_tp_ctl_reg(encoder, crtc_state)); if (dp_tp_ctl & DP_TP_CTL_ENABLE) mtl_disable_ddi_buf(encoder, crtc_state); /* 6.d Configure and enable DP_TP_CTL with link training pattern 1 selected */ dp_tp_ctl = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) || + intel_dp_is_uhbr(crtc_state)) { dp_tp_ctl |= DP_TP_CTL_MODE_MST; } else { dp_tp_ctl |= DP_TP_CTL_MODE_SST; if (crtc_state->enhanced_framing) dp_tp_ctl |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } - intel_de_write(dev_priv, dp_tp_ctl_reg(encoder, crtc_state), dp_tp_ctl); - intel_de_posting_read(dev_priv, dp_tp_ctl_reg(encoder, crtc_state)); + intel_de_write(display, dp_tp_ctl_reg(encoder, crtc_state), dp_tp_ctl); + intel_de_posting_read(display, dp_tp_ctl_reg(encoder, crtc_state)); /* 6.f Enable D2D Link */ mtl_ddi_enable_d2d(encoder); @@ -3640,11 +3760,11 @@ static void mtl_ddi_prepare_link_retrain(struct intel_dp *intel_dp, /* 6.i Configure and enable DDI_CTL_DE to start sending valid data to port slice */ intel_dp->DP |= DDI_BUF_CTL_ENABLE; - if (DISPLAY_VER(dev_priv) >= 20) + if (DISPLAY_VER(display) >= 20) intel_dp->DP |= XE2LPD_DDI_BUF_D2D_LINK_ENABLE; - intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); - intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); + intel_de_write(display, DDI_BUF_CTL(port), intel_dp->DP); + intel_de_posting_read(display, DDI_BUF_CTL(port)); /* 6.j Poll for PORT_BUF_CTL Idle Status == 0, timeout after 100 us */ intel_wait_ddi_buf_active(encoder); @@ -3679,7 +3799,8 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp, } dp_tp_ctl = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) || + intel_dp_is_uhbr(crtc_state)) { dp_tp_ctl |= DP_TP_CTL_MODE_MST; } else { dp_tp_ctl |= DP_TP_CTL_MODE_SST; @@ -3872,29 +3993,141 @@ static void bdw_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) crtc_state->sync_mode_slaves_mask); } +static void intel_ddi_read_func_ctl_dvi(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + u32 ddi_func_ctl) +{ + struct intel_display *display = to_intel_display(encoder); + + crtc_state->output_types |= BIT(INTEL_OUTPUT_HDMI); + if (DISPLAY_VER(display) >= 14) + crtc_state->lane_count = + ((ddi_func_ctl & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + else + crtc_state->lane_count = 4; +} + +static void intel_ddi_read_func_ctl_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + u32 ddi_func_ctl) +{ + crtc_state->has_hdmi_sink = true; + + crtc_state->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, crtc_state); + + if (crtc_state->infoframes.enable) + crtc_state->has_infoframe = true; + + if (ddi_func_ctl & TRANS_DDI_HDMI_SCRAMBLING) + crtc_state->hdmi_scrambling = true; + if (ddi_func_ctl & TRANS_DDI_HIGH_TMDS_CHAR_RATE) + crtc_state->hdmi_high_tmds_clock_ratio = true; + + intel_ddi_read_func_ctl_dvi(encoder, crtc_state, ddi_func_ctl); +} + +static void intel_ddi_read_func_ctl_fdi(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + u32 ddi_func_ctl) +{ + struct intel_display *display = to_intel_display(encoder); + + crtc_state->output_types |= BIT(INTEL_OUTPUT_ANALOG); + crtc_state->enhanced_framing = + intel_de_read(display, dp_tp_ctl_reg(encoder, crtc_state)) & + DP_TP_CTL_ENHANCED_FRAME_ENABLE; +} + +static void intel_ddi_read_func_ctl_dp_sst(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + u32 ddi_func_ctl) +{ + struct intel_display *display = to_intel_display(encoder); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (encoder->type == INTEL_OUTPUT_EDP) + crtc_state->output_types |= BIT(INTEL_OUTPUT_EDP); + else + crtc_state->output_types |= BIT(INTEL_OUTPUT_DP); + crtc_state->lane_count = + ((ddi_func_ctl & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + + if (DISPLAY_VER(display) >= 12 && + (ddi_func_ctl & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B) + crtc_state->mst_master_transcoder = + REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, ddi_func_ctl); + + intel_cpu_transcoder_get_m1_n1(crtc, cpu_transcoder, &crtc_state->dp_m_n); + intel_cpu_transcoder_get_m2_n2(crtc, cpu_transcoder, &crtc_state->dp_m2_n2); + + crtc_state->enhanced_framing = + intel_de_read(display, dp_tp_ctl_reg(encoder, crtc_state)) & + DP_TP_CTL_ENHANCED_FRAME_ENABLE; + + if (DISPLAY_VER(display) >= 11) + crtc_state->fec_enable = + intel_de_read(display, + dp_tp_ctl_reg(encoder, crtc_state)) & DP_TP_CTL_FEC_ENABLE; + + if (dig_port->lspcon.active && intel_dp_has_hdmi_sink(&dig_port->dp)) + crtc_state->infoframes.enable |= + intel_lspcon_infoframes_enabled(encoder, crtc_state); + else + crtc_state->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, crtc_state); +} + +static void intel_ddi_read_func_ctl_dp_mst(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + u32 ddi_func_ctl) +{ + struct intel_display *display = to_intel_display(encoder); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + crtc_state->output_types |= BIT(INTEL_OUTPUT_DP_MST); + crtc_state->lane_count = + ((ddi_func_ctl & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + + if (DISPLAY_VER(display) >= 12) + crtc_state->mst_master_transcoder = + REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, ddi_func_ctl); + + intel_cpu_transcoder_get_m1_n1(crtc, cpu_transcoder, &crtc_state->dp_m_n); + + if (DISPLAY_VER(display) >= 11) + crtc_state->fec_enable = + intel_de_read(display, + dp_tp_ctl_reg(encoder, crtc_state)) & DP_TP_CTL_FEC_ENABLE; + + crtc_state->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, crtc_state); +} + static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - u32 temp, flags = 0; + u32 ddi_func_ctl, ddi_mode, flags = 0; - temp = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder)); - if (temp & TRANS_DDI_PHSYNC) + ddi_func_ctl = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, cpu_transcoder)); + if (ddi_func_ctl & TRANS_DDI_PHSYNC) flags |= DRM_MODE_FLAG_PHSYNC; else flags |= DRM_MODE_FLAG_NHSYNC; - if (temp & TRANS_DDI_PVSYNC) + if (ddi_func_ctl & TRANS_DDI_PVSYNC) flags |= DRM_MODE_FLAG_PVSYNC; else flags |= DRM_MODE_FLAG_NVSYNC; pipe_config->hw.adjusted_mode.flags |= flags; - switch (temp & TRANS_DDI_BPC_MASK) { + switch (ddi_func_ctl & TRANS_DDI_BPC_MASK) { case TRANS_DDI_BPC_6: pipe_config->pipe_bpp = 18; break; @@ -3911,93 +4144,37 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, break; } - switch (temp & TRANS_DDI_MODE_SELECT_MASK) { - case TRANS_DDI_MODE_SELECT_HDMI: - pipe_config->has_hdmi_sink = true; - - pipe_config->infoframes.enable |= - intel_hdmi_infoframes_enabled(encoder, pipe_config); - - if (pipe_config->infoframes.enable) - pipe_config->has_infoframe = true; - - if (temp & TRANS_DDI_HDMI_SCRAMBLING) - pipe_config->hdmi_scrambling = true; - if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE) - pipe_config->hdmi_high_tmds_clock_ratio = true; - fallthrough; - case TRANS_DDI_MODE_SELECT_DVI: - pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); - if (DISPLAY_VER(dev_priv) >= 14) - pipe_config->lane_count = - ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - else - pipe_config->lane_count = 4; - break; - case TRANS_DDI_MODE_SELECT_DP_SST: - if (encoder->type == INTEL_OUTPUT_EDP) - pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); - else - pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); - pipe_config->lane_count = - ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - - intel_cpu_transcoder_get_m1_n1(crtc, cpu_transcoder, - &pipe_config->dp_m_n); - intel_cpu_transcoder_get_m2_n2(crtc, cpu_transcoder, - &pipe_config->dp_m2_n2); - - pipe_config->enhanced_framing = - intel_de_read(dev_priv, dp_tp_ctl_reg(encoder, pipe_config)) & - DP_TP_CTL_ENHANCED_FRAME_ENABLE; - - if (DISPLAY_VER(dev_priv) >= 11) - pipe_config->fec_enable = - intel_de_read(dev_priv, - dp_tp_ctl_reg(encoder, pipe_config)) & DP_TP_CTL_FEC_ENABLE; + ddi_mode = ddi_func_ctl & TRANS_DDI_MODE_SELECT_MASK; + + if (ddi_mode == TRANS_DDI_MODE_SELECT_HDMI) { + intel_ddi_read_func_ctl_hdmi(encoder, pipe_config, ddi_func_ctl); + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DVI) { + intel_ddi_read_func_ctl_dvi(encoder, pipe_config, ddi_func_ctl); + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && !HAS_DP20(display)) { + intel_ddi_read_func_ctl_fdi(encoder, pipe_config, ddi_func_ctl); + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_SST) { + intel_ddi_read_func_ctl_dp_sst(encoder, pipe_config, ddi_func_ctl); + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST) { + intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); + } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (dig_port->lspcon.active && intel_dp_has_hdmi_sink(&dig_port->dp)) - pipe_config->infoframes.enable |= - intel_lspcon_infoframes_enabled(encoder, pipe_config); + /* + * If this is true, we know we're being called from mst stream + * encoder's ->get_config(). + */ + if (intel_dp->is_mst) + intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); else - pipe_config->infoframes.enable |= - intel_hdmi_infoframes_enabled(encoder, pipe_config); - break; - case TRANS_DDI_MODE_SELECT_FDI_OR_128B132B: - if (!HAS_DP20(dev_priv)) { - /* FDI */ - pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); - pipe_config->enhanced_framing = - intel_de_read(dev_priv, dp_tp_ctl_reg(encoder, pipe_config)) & - DP_TP_CTL_ENHANCED_FRAME_ENABLE; - break; - } - fallthrough; /* 128b/132b */ - case TRANS_DDI_MODE_SELECT_DP_MST: - pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); - pipe_config->lane_count = - ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - - if (DISPLAY_VER(dev_priv) >= 12) - pipe_config->mst_master_transcoder = - REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); - - intel_cpu_transcoder_get_m1_n1(crtc, cpu_transcoder, - &pipe_config->dp_m_n); - - if (DISPLAY_VER(dev_priv) >= 11) - pipe_config->fec_enable = - intel_de_read(dev_priv, - dp_tp_ctl_reg(encoder, pipe_config)) & DP_TP_CTL_FEC_ENABLE; - - pipe_config->infoframes.enable |= - intel_hdmi_infoframes_enabled(encoder, pipe_config); - break; - default: - break; + intel_ddi_read_func_ctl_dp_sst(encoder, pipe_config, ddi_func_ctl); } } +/* + * Note: Also called from the ->get_config of the MST stream encoders on their + * primary encoder, via the platform specific hooks here. See also the comment + * for intel_ddi_pre_enable(). + */ static void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -4465,8 +4642,7 @@ static const struct drm_encoder_funcs intel_ddi_funcs = { .late_register = intel_ddi_encoder_late_register, }; -static struct intel_connector * -intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) +static int intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_connector *connector; @@ -4474,7 +4650,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) connector = intel_connector_alloc(); if (!connector) - return NULL; + return -ENOMEM; dig_port->dp.output_reg = DDI_BUF_CTL(port); if (DISPLAY_VER(i915) >= 14) @@ -4489,7 +4665,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) if (!intel_dp_init_connector(dig_port, connector)) { kfree(connector); - return NULL; + return -EINVAL; } if (dig_port->base.type == INTEL_OUTPUT_EDP) { @@ -4505,7 +4681,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) } } - return connector; + return 0; } static int intel_hdmi_reset_link(struct intel_encoder *encoder, @@ -4671,20 +4847,28 @@ static bool bdw_digital_port_connected(struct intel_encoder *encoder) return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; } -static struct intel_connector * -intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) +static int intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) { struct intel_connector *connector; enum port port = dig_port->base.port; connector = intel_connector_alloc(); if (!connector) - return NULL; + return -ENOMEM; dig_port->hdmi.hdmi_reg = DDI_BUF_CTL(port); - intel_hdmi_init_connector(dig_port, connector); - return connector; + if (!intel_hdmi_init_connector(dig_port, connector)) { + /* + * HDMI connector init failures may just mean conflicting DDC + * pins or not having enough lanes. Handle them gracefully, but + * don't fail the entire DDI init. + */ + dig_port->hdmi.hdmi_reg = INVALID_MMIO_REG; + kfree(connector); + } + + return 0; } static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) @@ -4694,7 +4878,7 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) if (dig_port->base.port != PORT_A) return false; - if (dig_port->saved_port_bits & DDI_A_4_LANES) + if (dig_port->ddi_a_4_lanes) return false; /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only @@ -4732,7 +4916,7 @@ intel_ddi_max_lanes(struct intel_digital_port *dig_port) if (intel_ddi_a_force_4_lanes(dig_port)) { drm_dbg_kms(&dev_priv->drm, "Forcing DDI_A_4_LANES for port A\n"); - dig_port->saved_port_bits |= DDI_A_4_LANES; + dig_port->ddi_a_4_lanes = true; max_lanes = 4; } @@ -4839,8 +5023,10 @@ static void intel_ddi_tc_encoder_suspend_complete(struct intel_encoder *encoder) static void intel_ddi_encoder_shutdown(struct intel_encoder *encoder) { - intel_dp_encoder_shutdown(encoder); - intel_hdmi_encoder_shutdown(encoder); + if (intel_encoder_is_dp(encoder)) + intel_dp_encoder_shutdown(encoder); + if (intel_encoder_is_hdmi(encoder)) + intel_hdmi_encoder_shutdown(encoder); } static void intel_ddi_tc_encoder_shutdown_complete(struct intel_encoder *encoder) @@ -4911,6 +5097,7 @@ void intel_ddi_init(struct intel_display *display, bool init_hdmi, init_dp; enum port port; enum phy phy; + u32 ddi_buf_ctl; port = intel_bios_encoder_port(devdata); if (port == PORT_NONE) @@ -5034,10 +5221,10 @@ void intel_ddi_init(struct intel_display *display, encoder->compute_output_type = intel_ddi_compute_output_type; encoder->compute_config = intel_ddi_compute_config; encoder->compute_config_late = intel_ddi_compute_config_late; - encoder->enable = intel_enable_ddi; + encoder->enable = intel_ddi_enable; encoder->pre_pll_enable = intel_ddi_pre_pll_enable; encoder->pre_enable = intel_ddi_pre_enable; - encoder->disable = intel_disable_ddi; + encoder->disable = intel_ddi_disable; encoder->post_pll_disable = intel_ddi_post_pll_disable; encoder->post_disable = intel_ddi_post_disable; encoder->update_pipe = intel_ddi_update_pipe; @@ -5160,17 +5347,12 @@ void intel_ddi_init(struct intel_display *display, else encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); - if (DISPLAY_VER(dev_priv) >= 11) - dig_port->saved_port_bits = - intel_de_read(dev_priv, DDI_BUF_CTL(port)) - & DDI_BUF_PORT_REVERSAL; - else - dig_port->saved_port_bits = - intel_de_read(dev_priv, DDI_BUF_CTL(port)) - & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + ddi_buf_ctl = intel_de_read(dev_priv, DDI_BUF_CTL(port)); + + dig_port->lane_reversal = intel_bios_encoder_lane_reversal(devdata) || + ddi_buf_ctl & DDI_BUF_PORT_REVERSAL; - if (intel_bios_encoder_lane_reversal(devdata)) - dig_port->saved_port_bits |= DDI_BUF_PORT_REVERSAL; + dig_port->ddi_a_4_lanes = DISPLAY_VER(dev_priv) < 11 && ddi_buf_ctl & DDI_A_4_LANES; dig_port->dp.output_reg = INVALID_MMIO_REG; dig_port->max_lanes = intel_ddi_max_lanes(dig_port); @@ -5233,7 +5415,7 @@ void intel_ddi_init(struct intel_display *display, intel_infoframe_init(dig_port); if (init_dp) { - if (!intel_ddi_init_dp_connector(dig_port)) + if (intel_ddi_init_dp_connector(dig_port)) goto err; dig_port->hpd_pulse = intel_dp_hpd_pulse; @@ -5247,7 +5429,7 @@ void intel_ddi_init(struct intel_display *display, * but leave it just in case we have some really bad VBTs... */ if (encoder->type != INTEL_OUTPUT_EDP && init_hdmi) { - if (!intel_ddi_init_hdmi_connector(dig_port)) + if (intel_ddi_init_hdmi_connector(dig_port)) goto err; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 640851d46b1b..2faadd1441e2 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -26,10 +26,12 @@ enum transcoder; i915_reg_t dp_tp_ctl_reg(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); -i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915, - enum transcoder cpu_transcoder); + +void intel_ddi_clear_act_sent(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void intel_ddi_wait_for_act_sent(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); + void intel_ddi_fdi_post_disable(struct intel_atomic_state *state, struct intel_encoder *intel_encoder, const struct intel_crtc_state *old_crtc_state, @@ -57,6 +59,8 @@ void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); +void intel_ddi_config_transcoder_func(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 4d21ce734343..9389b295036e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -1687,18 +1687,24 @@ dg2_get_snps_buf_trans(struct intel_encoder *encoder, } static const struct intel_ddi_buf_trans * -mtl_get_cx0_buf_trans(struct intel_encoder *encoder, +mtl_get_c10_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - if (intel_crtc_has_dp_encoder(crtc_state) && crtc_state->port_clock >= 1000000) + return intel_get_buf_trans(&mtl_c10_trans_dp14, n_entries); +} + +static const struct intel_ddi_buf_trans * +mtl_get_c20_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (intel_crtc_has_dp_encoder(crtc_state) && intel_dp_is_uhbr(crtc_state)) return intel_get_buf_trans(&mtl_c20_trans_uhbr, n_entries); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && !(intel_encoder_is_c10phy(encoder))) + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&mtl_c20_trans_hdmi, n_entries); - else if (!intel_encoder_is_c10phy(encoder)) - return intel_get_buf_trans(&mtl_c20_trans_dp14, n_entries); else - return intel_get_buf_trans(&mtl_c10_trans_dp14, n_entries); + return intel_get_buf_trans(&mtl_c20_trans_dp14, n_entries); } void intel_ddi_buf_trans_init(struct intel_encoder *encoder) @@ -1706,7 +1712,10 @@ void intel_ddi_buf_trans_init(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); if (DISPLAY_VER(i915) >= 14) { - encoder->get_buf_trans = mtl_get_cx0_buf_trans; + if (intel_encoder_is_c10phy(encoder)) + encoder->get_buf_trans = mtl_get_c10_buf_trans; + else + encoder->get_buf_trans = mtl_get_c20_buf_trans; } else if (IS_DG2(i915)) { encoder->get_buf_trans = dg2_get_snps_buf_trans; } else if (IS_ALDERLAKE_P(i915)) { diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h index bb51f974e9e2..b7399e9d11cc 100644 --- a/drivers/gpu/drm/i915/display/intel_de.h +++ b/drivers/gpu/drm/i915/display/intel_de.h @@ -6,14 +6,16 @@ #ifndef __INTEL_DE_H__ #define __INTEL_DE_H__ -#include "i915_drv.h" -#include "i915_trace.h" +#include "intel_display_conversion.h" +#include "intel_display_core.h" +#include "intel_dmc_wl.h" #include "intel_dsb.h" #include "intel_uncore.h" +#include "intel_uncore_trace.h" static inline struct intel_uncore *__to_uncore(struct intel_display *display) { - return &to_i915(display->drm)->uncore; + return to_intel_uncore(display->drm); } static inline u32 @@ -118,6 +120,16 @@ __intel_de_wait_for_register_nowl(struct intel_display *display, } static inline int +__intel_de_wait_for_register_atomic_nowl(struct intel_display *display, + i915_reg_t reg, + u32 mask, u32 value, + unsigned int fast_timeout_us) +{ + return __intel_wait_for_register(__to_uncore(display), reg, mask, + value, fast_timeout_us, 0, NULL); +} + +static inline int intel_de_wait(struct intel_display *display, i915_reg_t reg, u32 mask, u32 value, unsigned int timeout) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 863927f429aa..4271da219b41 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -511,6 +511,7 @@ void vlv_wait_port_ready(struct intel_display *display, void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state) { + struct intel_display *display = to_intel_display(new_crtc_state); struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; @@ -554,8 +555,7 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state) if (DISPLAY_VER(dev_priv) == 14) set |= DP_FEC_BS_JITTER_WA; - intel_de_rmw(dev_priv, - hsw_chicken_trans_reg(dev_priv, cpu_transcoder), + intel_de_rmw(display, CHICKEN_TRANS(display, cpu_transcoder), clear, set); } @@ -591,6 +591,7 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state) void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) { + struct intel_display *display = to_intel_display(old_crtc_state); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; @@ -628,7 +629,7 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) intel_de_write(dev_priv, TRANSCONF(dev_priv, cpu_transcoder), val); if (DISPLAY_VER(dev_priv) >= 12) - intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), + intel_de_rmw(display, CHICKEN_TRANS(display, cpu_transcoder), FECSTALL_DIS_DPTSTREAM_DPTTG, 0); if ((val & TRANSCONF_ENABLE) == 0) @@ -1744,10 +1745,9 @@ static void hsw_set_linetime_wm(const struct intel_crtc_state *crtc_state) static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); - intel_de_rmw(i915, hsw_chicken_trans_reg(i915, crtc_state->cpu_transcoder), + intel_de_rmw(display, CHICKEN_TRANS(display, crtc_state->cpu_transcoder), HSW_FRAME_START_DELAY_MASK, HSW_FRAME_START_DELAY(crtc_state->framestart_delay - 1)); } @@ -2371,7 +2371,7 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); /* GDG double wide on either pipe, otherwise pipe A only */ - return DISPLAY_VER(dev_priv) < 4 && + return HAS_DOUBLE_WIDE(dev_priv) && (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } @@ -3137,9 +3137,14 @@ bdw_get_pipe_misc_output_format(struct intel_crtc *crtc) tmp = intel_de_read(dev_priv, PIPE_MISC(crtc->pipe)); if (tmp & PIPE_MISC_YUV420_ENABLE) { - /* We support 4:2:0 in full blend mode only */ - drm_WARN_ON(&dev_priv->drm, - (tmp & PIPE_MISC_YUV420_MODE_FULL_BLEND) == 0); + /* + * We support 4:2:0 in full blend mode only. + * For xe3_lpd+ this is implied in YUV420 Enable bit. + * Ensure the same for prior platforms in YUV420 Mode bit. + */ + if (DISPLAY_VER(dev_priv) < 30) + drm_WARN_ON(&dev_priv->drm, + (tmp & PIPE_MISC_YUV420_MODE_FULL_BLEND) == 0); return INTEL_OUTPUT_FORMAT_YCBCR420; } else if (tmp & PIPE_MISC_OUTPUT_COLORSPACE_YUV) { @@ -3207,7 +3212,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, intel_color_get_config(pipe_config); - if (DISPLAY_VER(dev_priv) < 4) + if (HAS_DOUBLE_WIDE(dev_priv)) pipe_config->double_wide = tmp & TRANSCONF_DOUBLE_WIDE; intel_get_transcoder_timings(crtc, pipe_config); @@ -3388,8 +3393,8 @@ static void bdw_set_pipe_misc(struct intel_dsb *dsb, val |= PIPE_MISC_OUTPUT_COLORSPACE_YUV; if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - val |= PIPE_MISC_YUV420_ENABLE | - PIPE_MISC_YUV420_MODE_FULL_BLEND; + val |= DISPLAY_VER(display) >= 30 ? PIPE_MISC_YUV420_ENABLE : + PIPE_MISC_YUV420_ENABLE | PIPE_MISC_YUV420_MODE_FULL_BLEND; if (DISPLAY_VER(dev_priv) >= 11 && is_hdr_mode(crtc_state)) val |= PIPE_MISC_HDR_MODE_PRECISION; @@ -3746,12 +3751,13 @@ static u8 fixup_ultrajoiner_secondary_pipes(u8 ultrajoiner_primary_pipes, static void enabled_ultrajoiner_pipes(struct drm_i915_private *i915, u8 *primary_pipes, u8 *secondary_pipes) { + struct intel_display *display = &i915->display; struct intel_crtc *crtc; *primary_pipes = 0; *secondary_pipes = 0; - if (!HAS_ULTRAJOINER(i915)) + if (!HAS_ULTRAJOINER(display)) return; for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, @@ -4111,6 +4117,7 @@ static void intel_joiner_get_config(struct intel_crtc_state *crtc_state) static bool hsw_get_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); bool active; u32 tmp; @@ -4187,7 +4194,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, } if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) { - tmp = intel_de_read(dev_priv, hsw_chicken_trans_reg(dev_priv, pipe_config->cpu_transcoder)); + tmp = intel_de_read(display, CHICKEN_TRANS(display, pipe_config->cpu_transcoder)); pipe_config->framestart_delay = REG_FIELD_GET(HSW_FRAME_START_DELAY_MASK, tmp) + 1; } else { @@ -4545,6 +4552,7 @@ static int hsw_compute_linetime_wm(struct intel_atomic_state *state, static int intel_crtc_atomic_check(struct intel_atomic_state *state, struct intel_crtc *crtc) { + struct intel_display *display = to_intel_display(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -4581,12 +4589,12 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, return ret; } - ret = intel_atomic_setup_scalers(dev_priv, crtc, crtc_state); + ret = intel_atomic_setup_scalers(state, crtc); if (ret) return ret; } - if (HAS_IPS(dev_priv)) { + if (HAS_IPS(display)) { ret = hsw_ips_compute_config(state, crtc); if (ret) return ret; @@ -5208,7 +5216,7 @@ pipe_config_dp_vsc_sdp_mismatch(struct drm_printer *p, bool fastset, const struct drm_dp_vsc_sdp *a, const struct drm_dp_vsc_sdp *b) { - pipe_config_mismatch(p, fastset, crtc, name, "dp sdp"); + pipe_config_mismatch(p, fastset, crtc, name, "dp vsc sdp"); drm_printf(p, "expected:\n"); drm_dp_vsc_sdp_log(p, a); @@ -5217,27 +5225,18 @@ pipe_config_dp_vsc_sdp_mismatch(struct drm_printer *p, bool fastset, } static void -pipe_config_dp_as_sdp_mismatch(struct drm_i915_private *i915, - bool fastset, const char *name, +pipe_config_dp_as_sdp_mismatch(struct drm_printer *p, bool fastset, + const struct intel_crtc *crtc, + const char *name, const struct drm_dp_as_sdp *a, const struct drm_dp_as_sdp *b) { - struct drm_printer p; - - if (fastset) { - p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, NULL); - - drm_printf(&p, "fastset requirement not met in %s dp sdp\n", name); - } else { - p = drm_err_printer(&i915->drm, NULL); + pipe_config_mismatch(p, fastset, crtc, name, "dp as sdp"); - drm_printf(&p, "mismatch in %s dp sdp\n", name); - } - - drm_printf(&p, "expected:\n"); - drm_dp_as_sdp_log(&p, a); - drm_printf(&p, "found:\n"); - drm_dp_as_sdp_log(&p, b); + drm_printf(p, "expected:\n"); + drm_dp_as_sdp_log(p, a); + drm_printf(p, "found:\n"); + drm_dp_as_sdp_log(p, b); } /* Returns the length up to and including the last differing byte */ @@ -5260,26 +5259,13 @@ pipe_config_buffer_mismatch(struct drm_printer *p, bool fastset, const char *name, const u8 *a, const u8 *b, size_t len) { - const char *loglevel; - - if (fastset) { - if (!drm_debug_enabled(DRM_UT_KMS)) - return; - - loglevel = KERN_DEBUG; - } else { - loglevel = KERN_ERR; - } - pipe_config_mismatch(p, fastset, crtc, name, "buffer"); /* only dump up to the last difference */ len = memcmp_diff_len(a, b, len); - print_hex_dump(loglevel, "expected: ", DUMP_PREFIX_NONE, - 16, 0, a, len, false); - print_hex_dump(loglevel, "found: ", DUMP_PREFIX_NONE, - 16, 0, b, len, false); + drm_print_hex_dump(p, "expected: ", a, len); + drm_print_hex_dump(p, "found: ", b, len); } static void @@ -5322,6 +5308,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, const struct intel_crtc_state *pipe_config, bool fastset) { + struct intel_display *display = to_intel_display(current_config); struct drm_i915_private *dev_priv = to_i915(current_config->uapi.crtc->dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_printer p; @@ -5498,7 +5485,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_DP_AS_SDP(name) do { \ if (!intel_compare_dp_as_sdp(¤t_config->infoframes.name, \ &pipe_config->infoframes.name)) { \ - pipe_config_dp_as_sdp_mismatch(dev_priv, fastset, __stringify(name), \ + pipe_config_dp_as_sdp_mismatch(&p, fastset, crtc, __stringify(name), \ ¤t_config->infoframes.name, \ &pipe_config->infoframes.name); \ ret = false; \ @@ -5562,7 +5549,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(lane_count); PIPE_CONF_CHECK_X(lane_lat_optim_mask); - if (HAS_DOUBLE_BUFFERED_M_N(dev_priv)) { + if (HAS_DOUBLE_BUFFERED_M_N(display)) { if (!fastset || !pipe_config->update_m_n) PIPE_CONF_CHECK_M_N(dp_m_n); } else { @@ -5743,7 +5730,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset); PIPE_CONF_CHECK_BOOL(dsc.compression_enable); - PIPE_CONF_CHECK_BOOL(dsc.dsc_split); + PIPE_CONF_CHECK_I(dsc.num_streams); PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16); PIPE_CONF_CHECK_BOOL(splitter.enable); @@ -6797,6 +6784,7 @@ static int intel_atomic_check_config_and_link(struct intel_atomic_state *state) int intel_atomic_check(struct drm_device *dev, struct drm_atomic_state *_state) { + struct intel_display *display = to_intel_display(dev); struct drm_i915_private *dev_priv = to_i915(dev); struct intel_atomic_state *state = to_intel_atomic_state(_state); struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -6804,7 +6792,7 @@ int intel_atomic_check(struct drm_device *dev, int ret, i; bool any_ms = false; - if (!intel_display_driver_check_access(dev_priv)) + if (!intel_display_driver_check_access(display)) return -ENODEV; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, @@ -7572,7 +7560,7 @@ static void intel_atomic_dsb_cleanup(struct intel_crtc_state *crtc_state) static void intel_atomic_cleanup_work(struct work_struct *work) { struct intel_atomic_state *state = - container_of(work, struct intel_atomic_state, base.commit_work); + container_of(work, struct intel_atomic_state, cleanup_work); struct drm_i915_private *i915 = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state; struct intel_crtc *crtc; @@ -7822,6 +7810,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) /* Now enable the clocks, plane, pipe, and connectors that we set up. */ dev_priv->display.funcs.display->commit_modeset_enables(state); + intel_program_dpkgc_latency(state); + if (state->modeset) intel_set_cdclk_post_plane_update(state); @@ -7927,8 +7917,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * schedule point (cond_resched()) here anyway to keep latencies * down. */ - INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work); - queue_work(system_highpri_wq, &state->base.commit_work); + INIT_WORK(&state->cleanup_work, intel_atomic_cleanup_work); + queue_work(dev_priv->display.wq.cleanup, &state->cleanup_work); } static void intel_atomic_commit_work(struct work_struct *work) @@ -8166,7 +8156,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_lvds_init(dev_priv); intel_crt_init(display); - dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); + dpd_is_edp = intel_dp_is_port_edp(display, PORT_D); if (ilk_has_edp_a(dev_priv)) g4x_dp_init(dev_priv, DP_A, PORT_A); @@ -8212,14 +8202,14 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) * trust the port type the VBT declares as we've seen at least * HDMI ports that the VBT claim are DP or eDP. */ - has_edp = intel_dp_is_port_edp(dev_priv, PORT_B); + has_edp = intel_dp_is_port_edp(display, PORT_B); has_port = intel_bios_is_port_present(display, PORT_B); if (intel_de_read(dev_priv, VLV_DP_B) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_B, PORT_B); if ((intel_de_read(dev_priv, VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp) g4x_hdmi_init(dev_priv, VLV_HDMIB, PORT_B); - has_edp = intel_dp_is_port_edp(dev_priv, PORT_C); + has_edp = intel_dp_is_port_edp(display, PORT_C); has_port = intel_bios_is_port_present(display, PORT_C); if (intel_de_read(dev_priv, VLV_DP_C) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_C, PORT_C); @@ -8308,11 +8298,12 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) static int max_dotclock(struct drm_i915_private *i915) { - int max_dotclock = i915->display.cdclk.max_dotclk_freq; + struct intel_display *display = &i915->display; + int max_dotclock = display->cdclk.max_dotclk_freq; - if (HAS_ULTRAJOINER(i915)) + if (HAS_ULTRAJOINER(display)) max_dotclock *= 4; - else if (HAS_UNCOMPRESSED_JOINER(i915) || HAS_BIGJOINER(i915)) + else if (HAS_UNCOMPRESSED_JOINER(display) || HAS_BIGJOINER(display)) max_dotclock *= 2; return max_dotclock; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index caef04f655c5..49a246feb1ae 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -238,9 +238,6 @@ enum phy_fia { for ((__phy) = PHY_A; (__phy) < I915_MAX_PHYS; (__phy)++) \ for_each_if((__phys_mask) & BIT(__phy)) -#define for_each_crtc(dev, crtc) \ - list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) - #define for_each_intel_plane(dev, intel_plane) \ list_for_each_entry(intel_plane, \ &(dev)->mode_config.plane_list, \ diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.c b/drivers/gpu/drm/i915/display/intel_display_conversion.c new file mode 100644 index 000000000000..0578b68404da --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2024 Intel Corporation */ + +#include "i915_drv.h" + +struct intel_display *__i915_to_display(struct drm_i915_private *i915) +{ + return &i915->display; +} + +struct intel_display *__drm_to_display(struct drm_device *drm) +{ + return __i915_to_display(to_i915(drm)); +} diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.h b/drivers/gpu/drm/i915/display/intel_display_conversion.h index ad8545c8055d..46c7208d42ba 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.h +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.h @@ -8,14 +8,20 @@ #ifndef __INTEL_DISPLAY_CONVERSION__ #define __INTEL_DISPLAY_CONVERSION__ +struct drm_device; +struct drm_i915_private; +struct intel_display; + +struct intel_display *__i915_to_display(struct drm_i915_private *i915); +struct intel_display *__drm_to_display(struct drm_device *drm); /* * Transitional macro to optionally convert struct drm_i915_private * to struct * intel_display *, also accepting the latter. */ #define __to_intel_display(p) \ _Generic(p, \ - const struct drm_i915_private *: (&((const struct drm_i915_private *)(p))->display), \ - struct drm_i915_private *: (&((struct drm_i915_private *)(p))->display), \ + const struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ + struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ const struct intel_display *: (p), \ struct intel_display *: (p)) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 45b7c6900adc..554870d2494b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -453,7 +453,14 @@ struct intel_display { } ips; struct { - bool display_irqs_enabled; + /* + * Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block + * registers outside of the power domain. We defer setting up + * the display irqs in this case to the runtime pm. + */ + bool vlv_display_irqs_enabled; /* For i915gm/i945gm vblank irq workaround */ u8 vblank_enabled; @@ -505,6 +512,11 @@ struct intel_display { /* restore state for suspend/resume and display reset */ struct drm_atomic_state *modeset_state; struct drm_modeset_acquire_ctx reset_ctx; + u32 saveDSPARB; + u32 saveSWF0[16]; + u32 saveSWF1[16]; + u32 saveSWF3[3]; + u16 saveGCDGMBUS; } restore; struct { @@ -542,6 +554,9 @@ struct intel_display { /* unbound hipri wq for page flips/plane updates */ struct workqueue_struct *flip; + + /* hipri wq for commit cleanups */ + struct workqueue_struct *cleanup; } wq; /* Grouping using named structs. Keep sorted. */ diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 11aff485d8fa..f1d76484025a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -11,8 +11,10 @@ #include <drm/drm_fourcc.h> #include "hsw_ips.h" +#include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i9xx_wm_regs.h" #include "intel_alpm.h" #include "intel_bo.h" #include "intel_crtc.h" @@ -730,11 +732,12 @@ static bool intel_lpsp_power_well_enabled(struct drm_i915_private *i915, enum i915_power_well_id power_well_id) { + struct intel_display *display = &i915->display; intel_wakeref_t wakeref; bool is_enabled; wakeref = intel_runtime_pm_get(&i915->runtime_pm); - is_enabled = intel_display_power_well_is_enabled(i915, + is_enabled = intel_display_power_well_is_enabled(display, power_well_id); intel_runtime_pm_put(&i915->runtime_pm, wakeref); @@ -1012,6 +1015,8 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) DP_DSC_YCbCr444))); seq_printf(m, "DSC_Sink_BPP_Precision: %d\n", drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd)); + seq_printf(m, "DSC_Sink_Max_Slice_Count: %d\n", + drm_dp_dsc_sink_max_slice_count((connector->dp.dsc_dpcd), intel_dp_is_edp(intel_dp))); seq_printf(m, "Force_DSC_Enable: %s\n", str_yes_no(intel_dp->force_dsc_en)); if (!intel_dp_is_edp(intel_dp)) @@ -1331,7 +1336,7 @@ static ssize_t i915_joiner_write(struct file *file, { struct seq_file *m = file->private_data; struct intel_connector *connector = m->private; - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); int force_joined_pipes = 0; int ret; @@ -1349,7 +1354,7 @@ static ssize_t i915_joiner_write(struct file *file, connector->force_joined_pipes = force_joined_pipes; break; case 4: - if (HAS_ULTRAJOINER(i915)) { + if (HAS_ULTRAJOINER(display)) { connector->force_joined_pipes = force_joined_pipes; break; } diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c index ec3ed29a83c9..88914a1f3f62 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -7,9 +7,10 @@ #include <linux/kernel.h> #include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include "intel_display_core.h" #include "intel_display_debugfs_params.h" -#include "i915_drv.h" #include "intel_display_params.h" /* int param */ diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 5f98e1b2a401..68cb7f9b9ef3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -16,6 +16,7 @@ #include "intel_display_params.h" #include "intel_display_power.h" #include "intel_display_reg_defs.h" +#include "intel_display_types.h" #include "intel_fbc.h" #include "intel_step.h" @@ -252,6 +253,7 @@ static const struct intel_display_device_info no_display = {}; static const struct platform_desc i830_desc = { PLATFORM(i830), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { I830_DISPLAY, @@ -270,6 +272,7 @@ static const struct platform_desc i845_desc = { static const struct platform_desc i85x_desc = { PLATFORM(i85x), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { I830_DISPLAY, @@ -312,6 +315,7 @@ static const struct platform_desc i915g_desc = { static const struct platform_desc i915gm_desc = { PLATFORM(i915gm), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { GEN3_DISPLAY, I9XX_COLORS, @@ -336,6 +340,7 @@ static const struct platform_desc i945g_desc = { static const struct platform_desc i945gm_desc = { PLATFORM(i915gm), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { GEN3_DISPLAY, I9XX_COLORS, @@ -357,13 +362,21 @@ static const struct platform_desc g33_desc = { }, }; -static const struct platform_desc pnv_desc = { +static const struct intel_display_device_info pnv_display = { + GEN3_DISPLAY, + I9XX_COLORS, + .has_hotplug = 1, +}; + +static const struct platform_desc pnv_g_desc = { PLATFORM(pineview), - .info = &(const struct intel_display_device_info) { - GEN3_DISPLAY, - I9XX_COLORS, - .has_hotplug = 1, - }, + .info = &pnv_display, +}; + +static const struct platform_desc pnv_m_desc = { + PLATFORM(pineview), + PLATFORM_GROUP(mobile), + .info = &pnv_display, }; #define GEN4_DISPLAY \ @@ -390,6 +403,7 @@ static const struct platform_desc i965g_desc = { static const struct platform_desc i965gm_desc = { PLATFORM(i965gm), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { GEN4_DISPLAY, .has_overlay = 1, @@ -413,6 +427,7 @@ static const struct platform_desc g45_desc = { static const struct platform_desc gm45_desc = { PLATFORM(gm45), PLATFORM_GROUP(g4x), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { GEN4_DISPLAY, .supports_tv = 1, @@ -443,6 +458,7 @@ static const struct platform_desc ilk_d_desc = { static const struct platform_desc ilk_m_desc = { PLATFORM(ironlake), + PLATFORM_GROUP(mobile), .info = &(const struct intel_display_device_info) { ILK_DISPLAY, @@ -450,38 +466,54 @@ static const struct platform_desc ilk_m_desc = { }, }; -static const struct platform_desc snb_desc = { +static const struct intel_display_device_info snb_display = { + .has_hotplug = 1, + I9XX_PIPE_OFFSETS, + I9XX_CURSOR_OFFSETS, + ILK_COLORS, + + .__runtime_defaults.ip.ver = 6, + .__runtime_defaults.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), + .__runtime_defaults.cpu_transcoder_mask = + BIT(TRANSCODER_A) | BIT(TRANSCODER_B), + .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), /* DP A, SDVO/HDMI/DP B, HDMI/DP C/D */ + .__runtime_defaults.fbc_mask = BIT(INTEL_FBC_A), +}; + +static const struct platform_desc snb_d_desc = { PLATFORM(sandybridge), - .info = &(const struct intel_display_device_info) { - .has_hotplug = 1, - I9XX_PIPE_OFFSETS, - I9XX_CURSOR_OFFSETS, - ILK_COLORS, + .info = &snb_display, +}; - .__runtime_defaults.ip.ver = 6, - .__runtime_defaults.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), - .__runtime_defaults.cpu_transcoder_mask = - BIT(TRANSCODER_A) | BIT(TRANSCODER_B), - .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), /* DP A, SDVO/HDMI/DP B, HDMI/DP C/D */ - .__runtime_defaults.fbc_mask = BIT(INTEL_FBC_A), - }, +static const struct platform_desc snb_m_desc = { + PLATFORM(sandybridge), + PLATFORM_GROUP(mobile), + .info = &snb_display, }; -static const struct platform_desc ivb_desc = { +static const struct intel_display_device_info ivb_display = { + .has_hotplug = 1, + IVB_PIPE_OFFSETS, + IVB_CURSOR_OFFSETS, + IVB_COLORS, + + .__runtime_defaults.ip.ver = 7, + .__runtime_defaults.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .__runtime_defaults.cpu_transcoder_mask = + BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), + .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), /* DP A, SDVO/HDMI/DP B, HDMI/DP C/D */ + .__runtime_defaults.fbc_mask = BIT(INTEL_FBC_A), +}; + +static const struct platform_desc ivb_d_desc = { PLATFORM(ivybridge), - .info = &(const struct intel_display_device_info) { - .has_hotplug = 1, - IVB_PIPE_OFFSETS, - IVB_CURSOR_OFFSETS, - IVB_COLORS, + .info = &ivb_display, +}; - .__runtime_defaults.ip.ver = 7, - .__runtime_defaults.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), - .__runtime_defaults.cpu_transcoder_mask = - BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), - .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), /* DP A, SDVO/HDMI/DP B, HDMI/DP C/D */ - .__runtime_defaults.fbc_mask = BIT(INTEL_FBC_A), - }, +static const struct platform_desc ivb_m_desc = { + PLATFORM(ivybridge), + PLATFORM_GROUP(mobile), + .info = &ivb_display, }; static const struct platform_desc vlv_desc = { @@ -1011,6 +1043,7 @@ static const enum intel_step dg1_steppings[] = { static const struct platform_desc dg1_desc = { PLATFORM(dg1), + PLATFORM_GROUP(dgfx), .info = &(const struct intel_display_device_info) { XE_D_DISPLAY, @@ -1238,6 +1271,7 @@ static const enum intel_step dg2_g12_steppings[] = { static const struct platform_desc dg2_desc = { PLATFORM(dg2), + PLATFORM_GROUP(dgfx), .subplatforms = (const struct subplatform_desc[]) { { SUBPLATFORM(dg2, g10), @@ -1338,6 +1372,7 @@ static const struct platform_desc lnl_desc = { static const struct platform_desc bmg_desc = { PLATFORM(battlemage), + PLATFORM_GROUP(dgfx), }; static const struct platform_desc ptl_desc = { @@ -1381,11 +1416,14 @@ static const struct { INTEL_I965GM_IDS(INTEL_DISPLAY_DEVICE, &i965gm_desc), INTEL_GM45_IDS(INTEL_DISPLAY_DEVICE, &gm45_desc), INTEL_G45_IDS(INTEL_DISPLAY_DEVICE, &g45_desc), - INTEL_PNV_IDS(INTEL_DISPLAY_DEVICE, &pnv_desc), + INTEL_PNV_G_IDS(INTEL_DISPLAY_DEVICE, &pnv_g_desc), + INTEL_PNV_M_IDS(INTEL_DISPLAY_DEVICE, &pnv_m_desc), INTEL_ILK_D_IDS(INTEL_DISPLAY_DEVICE, &ilk_d_desc), INTEL_ILK_M_IDS(INTEL_DISPLAY_DEVICE, &ilk_m_desc), - INTEL_SNB_IDS(INTEL_DISPLAY_DEVICE, &snb_desc), - INTEL_IVB_IDS(INTEL_DISPLAY_DEVICE, &ivb_desc), + INTEL_SNB_D_IDS(INTEL_DISPLAY_DEVICE, &snb_d_desc), + INTEL_SNB_M_IDS(INTEL_DISPLAY_DEVICE, &snb_m_desc), + INTEL_IVB_D_IDS(INTEL_DISPLAY_DEVICE, &ivb_d_desc), + INTEL_IVB_M_IDS(INTEL_DISPLAY_DEVICE, &ivb_m_desc), INTEL_HSW_IDS(INTEL_DISPLAY_DEVICE, &hsw_desc), INTEL_VLV_IDS(INTEL_DISPLAY_DEVICE, &vlv_desc), INTEL_BDW_IDS(INTEL_DISPLAY_DEVICE, &bdw_desc), @@ -1429,9 +1467,9 @@ static const struct { }; static const struct intel_display_device_info * -probe_gmdid_display(struct drm_i915_private *i915, struct intel_display_ip_ver *ip_ver) +probe_gmdid_display(struct intel_display *display, struct intel_display_ip_ver *ip_ver) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); struct intel_display_ip_ver gmd_id; void __iomem *addr; u32 val; @@ -1439,7 +1477,8 @@ probe_gmdid_display(struct drm_i915_private *i915, struct intel_display_ip_ver * addr = pci_iomap_range(pdev, 0, i915_mmio_reg_offset(GMD_ID_DISPLAY), sizeof(u32)); if (!addr) { - drm_err(&i915->drm, "Cannot map MMIO BAR to read display GMD_ID\n"); + drm_err(display->drm, + "Cannot map MMIO BAR to read display GMD_ID\n"); return NULL; } @@ -1447,7 +1486,7 @@ probe_gmdid_display(struct drm_i915_private *i915, struct intel_display_ip_ver * pci_iounmap(pdev, addr); if (val == 0) { - drm_dbg_kms(&i915->drm, "Device doesn't have display\n"); + drm_dbg_kms(display->drm, "Device doesn't have display\n"); return NULL; } @@ -1463,7 +1502,8 @@ probe_gmdid_display(struct drm_i915_private *i915, struct intel_display_ip_ver * } } - drm_err(&i915->drm, "Unrecognized display IP version %d.%02d; disabling display.\n", + drm_err(display->drm, + "Unrecognized display IP version %d.%02d; disabling display.\n", gmd_id.ver, gmd_id.rel); return NULL; } @@ -1564,10 +1604,9 @@ static void display_platforms_or(struct intel_display_platforms *dst, bitmap_or(dst->bitmap, dst->bitmap, src->bitmap, display_platforms_num_bits()); } -void intel_display_device_probe(struct drm_i915_private *i915) +struct intel_display *intel_display_device_probe(struct pci_dev *pdev) { - struct intel_display *display = &i915->display; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct intel_display *display = to_intel_display(pdev); const struct intel_display_device_info *info; struct intel_display_ip_ver ip_ver = {}; const struct platform_desc *desc; @@ -1575,55 +1614,56 @@ void intel_display_device_probe(struct drm_i915_private *i915) enum intel_step step; /* Add drm device backpointer as early as possible. */ - i915->display.drm = &i915->drm; + display->drm = pci_get_drvdata(pdev); - intel_display_params_copy(&i915->display.params); + intel_display_params_copy(&display->params); if (has_no_display(pdev)) { - drm_dbg_kms(&i915->drm, "Device doesn't have display\n"); + drm_dbg_kms(display->drm, "Device doesn't have display\n"); goto no_display; } desc = find_platform_desc(pdev); if (!desc) { - drm_dbg_kms(&i915->drm, "Unknown device ID %04x; disabling display.\n", + drm_dbg_kms(display->drm, + "Unknown device ID %04x; disabling display.\n", pdev->device); goto no_display; } info = desc->info; if (!info) - info = probe_gmdid_display(i915, &ip_ver); + info = probe_gmdid_display(display, &ip_ver); if (!info) goto no_display; - DISPLAY_INFO(i915) = info; + DISPLAY_INFO(display) = info; - memcpy(DISPLAY_RUNTIME_INFO(i915), - &DISPLAY_INFO(i915)->__runtime_defaults, - sizeof(*DISPLAY_RUNTIME_INFO(i915))); + memcpy(DISPLAY_RUNTIME_INFO(display), + &DISPLAY_INFO(display)->__runtime_defaults, + sizeof(*DISPLAY_RUNTIME_INFO(display))); - drm_WARN_ON(&i915->drm, !desc->name || + drm_WARN_ON(display->drm, !desc->name || !display_platforms_weight(&desc->platforms)); display->platform = desc->platforms; subdesc = find_subplatform_desc(pdev, desc); if (subdesc) { - drm_WARN_ON(&i915->drm, !subdesc->name || + drm_WARN_ON(display->drm, !subdesc->name || !display_platforms_weight(&subdesc->platforms)); display_platforms_or(&display->platform, &subdesc->platforms); /* Ensure platform and subplatform are distinct */ - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, display_platforms_weight(&display->platform) != display_platforms_weight(&desc->platforms) + display_platforms_weight(&subdesc->platforms)); } if (ip_ver.ver || ip_ver.rel || ip_ver.step) { - DISPLAY_RUNTIME_INFO(i915)->ip = ip_ver; + DISPLAY_RUNTIME_INFO(display)->ip = ip_ver; step = STEP_A0 + ip_ver.step; if (step > STEP_FUTURE) { drm_dbg_kms(display->drm, "Using future display stepping\n"); @@ -1634,29 +1674,32 @@ void intel_display_device_probe(struct drm_i915_private *i915) subdesc ? &subdesc->step_info : NULL); } - DISPLAY_RUNTIME_INFO(i915)->step = step; + DISPLAY_RUNTIME_INFO(display)->step = step; - drm_info(&i915->drm, "Found %s%s%s (device ID %04x) display version %u.%02u stepping %s\n", + drm_info(display->drm, "Found %s%s%s (device ID %04x) %s display version %u.%02u stepping %s\n", desc->name, subdesc ? "/" : "", subdesc ? subdesc->name : "", - pdev->device, DISPLAY_RUNTIME_INFO(i915)->ip.ver, - DISPLAY_RUNTIME_INFO(i915)->ip.rel, + pdev->device, display->platform.dgfx ? "discrete" : "integrated", + DISPLAY_RUNTIME_INFO(display)->ip.ver, + DISPLAY_RUNTIME_INFO(display)->ip.rel, step != STEP_NONE ? intel_step_name(step) : "N/A"); - return; + return display; no_display: - DISPLAY_INFO(i915) = &no_display; + DISPLAY_INFO(display) = &no_display; + + return display; } -void intel_display_device_remove(struct drm_i915_private *i915) +void intel_display_device_remove(struct intel_display *display) { - intel_display_params_free(&i915->display.params); + intel_display_params_free(&display->params); } -static void __intel_display_device_info_runtime_init(struct drm_i915_private *i915) +static void __intel_display_device_info_runtime_init(struct intel_display *display) { - struct intel_display *display = &i915->display; - struct intel_display_runtime_info *display_runtime = DISPLAY_RUNTIME_INFO(i915); + struct drm_i915_private *i915 = to_i915(display->drm); + struct intel_display_runtime_info *display_runtime = DISPLAY_RUNTIME_INFO(display); enum pipe pipe; BUILD_BUG_ON(BITS_PER_TYPE(display_runtime->pipe_mask) < I915_MAX_PIPES); @@ -1664,35 +1707,35 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 BUILD_BUG_ON(BITS_PER_TYPE(display_runtime->port_mask) < I915_MAX_PORTS); /* This covers both ULT and ULX */ - if (IS_HASWELL_ULT(i915) || IS_BROADWELL_ULT(i915)) + if (display->platform.haswell_ult || display->platform.broadwell_ult) display_runtime->port_mask &= ~BIT(PORT_D); - if (IS_ICL_WITH_PORT_F(i915)) + if (display->platform.icelake_port_f) display_runtime->port_mask |= BIT(PORT_F); /* Wa_14011765242: adl-s A0,A1 */ - if (IS_ALDERLAKE_S(i915) && IS_DISPLAY_STEP(i915, STEP_A0, STEP_A2)) - for_each_pipe(i915, pipe) + if (display->platform.alderlake_s && IS_DISPLAY_STEP(display, STEP_A0, STEP_A2)) + for_each_pipe(display, pipe) display_runtime->num_scalers[pipe] = 0; - else if (DISPLAY_VER(i915) >= 11) { - for_each_pipe(i915, pipe) + else if (DISPLAY_VER(display) >= 11) { + for_each_pipe(display, pipe) display_runtime->num_scalers[pipe] = 2; - } else if (DISPLAY_VER(i915) >= 9) { + } else if (DISPLAY_VER(display) >= 9) { display_runtime->num_scalers[PIPE_A] = 2; display_runtime->num_scalers[PIPE_B] = 2; display_runtime->num_scalers[PIPE_C] = 1; } - if (DISPLAY_VER(i915) >= 13 || HAS_D12_PLANE_MINIMIZATION(i915)) - for_each_pipe(i915, pipe) + if (DISPLAY_VER(display) >= 13 || HAS_D12_PLANE_MINIMIZATION(display)) + for_each_pipe(display, pipe) display_runtime->num_sprites[pipe] = 4; - else if (DISPLAY_VER(i915) >= 11) - for_each_pipe(i915, pipe) + else if (DISPLAY_VER(display) >= 11) + for_each_pipe(display, pipe) display_runtime->num_sprites[pipe] = 6; - else if (DISPLAY_VER(i915) == 10) - for_each_pipe(i915, pipe) + else if (DISPLAY_VER(display) == 10) + for_each_pipe(display, pipe) display_runtime->num_sprites[pipe] = 3; - else if (IS_BROXTON(i915)) { + else if (display->platform.broxton) { /* * Skylake and Broxton currently don't expose the topmost plane as its * use is exclusive with the legacy cursor and we only want to expose @@ -1705,23 +1748,23 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 display_runtime->num_sprites[PIPE_A] = 2; display_runtime->num_sprites[PIPE_B] = 2; display_runtime->num_sprites[PIPE_C] = 1; - } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { - for_each_pipe(i915, pipe) + } else if (display->platform.valleyview || display->platform.cherryview) { + for_each_pipe(display, pipe) display_runtime->num_sprites[pipe] = 2; - } else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) { - for_each_pipe(i915, pipe) + } else if (DISPLAY_VER(display) >= 5 || display->platform.g4x) { + for_each_pipe(display, pipe) display_runtime->num_sprites[pipe] = 1; } - if ((IS_DGFX(i915) || DISPLAY_VER(i915) >= 14) && - !(intel_de_read(i915, GU_CNTL_PROTECTED) & DEPRESENT)) { - drm_info(&i915->drm, "Display not present, disabling\n"); + if ((display->platform.dgfx || DISPLAY_VER(display) >= 14) && + !(intel_de_read(display, GU_CNTL_PROTECTED) & DEPRESENT)) { + drm_info(display->drm, "Display not present, disabling\n"); goto display_fused_off; } - if (IS_DISPLAY_VER(i915, 7, 8) && HAS_PCH_SPLIT(i915)) { - u32 fuse_strap = intel_de_read(i915, FUSE_STRAP); - u32 sfuse_strap = intel_de_read(i915, SFUSE_STRAP); + if (IS_DISPLAY_VER(display, 7, 8) && HAS_PCH_SPLIT(i915)) { + u32 fuse_strap = intel_de_read(display, FUSE_STRAP); + u32 sfuse_strap = intel_de_read(display, SFUSE_STRAP); /* * SFUSE_STRAP is supposed to have a bit signalling the display @@ -1736,16 +1779,16 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 sfuse_strap & SFUSE_STRAP_DISPLAY_DISABLED || (HAS_PCH_CPT(i915) && !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { - drm_info(&i915->drm, + drm_info(display->drm, "Display fused off, disabling\n"); goto display_fused_off; } else if (fuse_strap & IVB_PIPE_C_DISABLE) { - drm_info(&i915->drm, "PipeC fused off\n"); + drm_info(display->drm, "PipeC fused off\n"); display_runtime->pipe_mask &= ~BIT(PIPE_C); display_runtime->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - } else if (DISPLAY_VER(i915) >= 9) { - u32 dfsm = intel_de_read(i915, SKL_DFSM); + } else if (DISPLAY_VER(display) >= 9) { + u32 dfsm = intel_de_read(display, SKL_DFSM); if (dfsm & SKL_DFSM_PIPE_A_DISABLE) { display_runtime->pipe_mask &= ~BIT(PIPE_A); @@ -1763,7 +1806,7 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 display_runtime->fbc_mask &= ~BIT(INTEL_FBC_C); } - if (DISPLAY_VER(i915) >= 12 && + if (DISPLAY_VER(display) >= 12 && (dfsm & TGL_DFSM_PIPE_D_DISABLE)) { display_runtime->pipe_mask &= ~BIT(PIPE_D); display_runtime->cpu_transcoder_mask &= ~BIT(TRANSCODER_D); @@ -1776,15 +1819,15 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) display_runtime->has_hdcp = 0; - if (IS_DG2(i915) || DISPLAY_VER(i915) < 13) { + if (display->platform.dg2 || DISPLAY_VER(display) < 13) { if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) display_runtime->fbc_mask = 0; } - if (DISPLAY_VER(i915) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + if (DISPLAY_VER(display) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) display_runtime->has_dmc = 0; - if (IS_DISPLAY_VER(i915, 10, 12) && + if (IS_DISPLAY_VER(display, 10, 12) && (dfsm & GLK_DFSM_DISPLAY_DSC_DISABLE)) display_runtime->has_dsc = 0; @@ -1793,8 +1836,8 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 display_runtime->has_dbuf_overlap_detection = false; } - if (DISPLAY_VER(i915) >= 20) { - u32 cap = intel_de_read(i915, XE2LPD_DE_CAP); + if (DISPLAY_VER(display) >= 20) { + u32 cap = intel_de_read(display, XE2LPD_DE_CAP); if (REG_FIELD_GET(XE2LPD_DE_CAP_DSC_MASK, cap) == XE2LPD_DE_CAP_DSC_REMOVED) @@ -1802,18 +1845,19 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 if (REG_FIELD_GET(XE2LPD_DE_CAP_SCALER_MASK, cap) == XE2LPD_DE_CAP_SCALER_SINGLE) { - for_each_pipe(i915, pipe) + for_each_pipe(display, pipe) if (display_runtime->num_scalers[pipe]) display_runtime->num_scalers[pipe] = 1; } } - if (DISPLAY_VER(i915) >= 30) + if (DISPLAY_VER(display) >= 30) display_runtime->edp_typec_support = intel_de_read(display, PICA_PHY_CONFIG_CONTROL) & EDP_ON_TYPEC; display_runtime->rawclk_freq = intel_read_rawclk(display); - drm_dbg_kms(&i915->drm, "rawclk rate: %d kHz\n", display_runtime->rawclk_freq); + drm_dbg_kms(display->drm, "rawclk rate: %d kHz\n", + display_runtime->rawclk_freq); return; @@ -1821,21 +1865,21 @@ display_fused_off: memset(display_runtime, 0, sizeof(*display_runtime)); } -void intel_display_device_info_runtime_init(struct drm_i915_private *i915) +void intel_display_device_info_runtime_init(struct intel_display *display) { - if (HAS_DISPLAY(i915)) - __intel_display_device_info_runtime_init(i915); + if (HAS_DISPLAY(display)) + __intel_display_device_info_runtime_init(display); /* Display may have been disabled by runtime init */ - if (!HAS_DISPLAY(i915)) { - i915->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); - i915->display.info.__device_info = &no_display; + if (!HAS_DISPLAY(display)) { + display->drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + display->info.__device_info = &no_display; } /* Disable nuclear pageflip by default on pre-g4x */ - if (!i915->display.params.nuclear_pageflip && - DISPLAY_VER(i915) < 5 && !IS_G4X(i915)) - i915->drm.driver_features &= ~DRIVER_ATOMIC; + if (!display->params.nuclear_pageflip && + DISPLAY_VER(display) < 5 && !display->platform.g4x) + display->drm->driver_features &= ~DRIVER_ATOMIC; } void intel_display_device_info_print(const struct intel_display_device_info *info, @@ -1872,10 +1916,8 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf * Disabling display means taking over the display hardware, putting it to * sleep, and preventing connectors from being connected via any means. */ -bool intel_display_device_enabled(struct drm_i915_private *i915) +bool intel_display_device_enabled(struct intel_display *display) { - struct intel_display *display = &i915->display; - /* Only valid when HAS_DISPLAY() is true */ drm_WARN_ON(display->drm, !HAS_DISPLAY(display)); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 43144a037f9f..9a333d9e6601 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -12,8 +12,9 @@ #include "intel_display_conversion.h" #include "intel_display_limits.h" -struct drm_i915_private; struct drm_printer; +struct intel_display; +struct pci_dev; /* * Display platforms and subplatforms. Keep platforms in display version based @@ -21,6 +22,10 @@ struct drm_printer; * platform. */ #define INTEL_DISPLAY_PLATFORMS(func) \ + /* Platform group aliases */ \ + func(g4x) /* g45 and gm45 */ \ + func(mobile) /* mobile platforms */ \ + func(dgfx) /* discrete graphics */ \ /* Display ver 2 */ \ func(i830) \ func(i845g) \ @@ -38,7 +43,6 @@ struct drm_printer; func(i965gm) \ func(g45) \ func(gm45) \ - func(g4x) /* group alias for g45 and gm45 */ \ /* Display ver 5 */ \ func(ironlake) \ /* Display ver 6 */ \ @@ -136,61 +140,64 @@ struct intel_display_platforms { func(overlay_needs_physical); \ func(supports_tv); -#define HAS_4TILE(i915) (IS_DG2(i915) || DISPLAY_VER(i915) >= 14) -#define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) -#define HAS_BIGJOINER(i915) (DISPLAY_VER(i915) >= 11 && HAS_DSC(i915)) -#define HAS_CDCLK_CRAWL(i915) (DISPLAY_INFO(i915)->has_cdclk_crawl) -#define HAS_CDCLK_SQUASH(i915) (DISPLAY_INFO(i915)->has_cdclk_squash) -#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13)) -#define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915)) -#define HAS_DBUF_OVERLAP_DETECTION(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dbuf_overlap_detection) -#define HAS_DDI(i915) (DISPLAY_INFO(i915)->has_ddi) -#define HAS_DISPLAY(i915) (DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0) -#define HAS_DMC(i915) (DISPLAY_RUNTIME_INFO(i915)->has_dmc) -#define HAS_DOUBLE_BUFFERED_M_N(i915) (DISPLAY_VER(i915) >= 9 || IS_BROADWELL(i915)) -#define HAS_DP_MST(i915) (DISPLAY_INFO(i915)->has_dp_mst) -#define HAS_DP20(i915) (IS_DG2(i915) || DISPLAY_VER(i915) >= 14) -#define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13) -#define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb) -#define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) -#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915)) -#define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) -#define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) -#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3) -#define HAS_GMBUS_IRQ(i915) (DISPLAY_VER(i915) >= 4) -#define HAS_GMBUS_BURST_READ(i915) (DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915)) -#define HAS_GMCH(i915) (DISPLAY_INFO(i915)->has_gmch) -#define HAS_HW_SAGV_WM(i915) (DISPLAY_VER(i915) >= 13 && !IS_DGFX(i915)) -#define HAS_IPC(i915) (DISPLAY_INFO(i915)->has_ipc) -#define HAS_IPS(i915) (IS_HASWELL_ULT(i915) || IS_BROADWELL(i915)) -#define HAS_LRR(i915) (DISPLAY_VER(i915) >= 12) -#define HAS_LSPCON(i915) (IS_DISPLAY_VER(i915, 9, 10)) -#define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) -#define HAS_MSO(i915) (DISPLAY_VER(i915) >= 12) -#define HAS_OVERLAY(i915) (DISPLAY_INFO(i915)->has_overlay) -#define HAS_PSR(i915) (DISPLAY_INFO(i915)->has_psr) -#define HAS_PSR_HW_TRACKING(i915) (DISPLAY_INFO(i915)->has_psr_hw_tracking) -#define HAS_PSR2_SEL_FETCH(i915) (DISPLAY_VER(i915) >= 12) -#define HAS_SAGV(i915) (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915) && !IS_GEMINILAKE(i915)) -#define HAS_TRANSCODER(i915, trans) ((DISPLAY_RUNTIME_INFO(i915)->cpu_transcoder_mask & \ - BIT(trans)) != 0) -#define HAS_UNCOMPRESSED_JOINER(i915) (DISPLAY_VER(i915) >= 13) -#define HAS_ULTRAJOINER(i915) ((DISPLAY_VER(i915) >= 20 || \ - (IS_DGFX(i915) && DISPLAY_VER(i915) == 14)) && \ - HAS_DSC(i915)) -#define HAS_VRR(i915) (DISPLAY_VER(i915) >= 11) -#define HAS_AS_SDP(i915) (DISPLAY_VER(i915) >= 13) -#define HAS_CMRR(i915) (DISPLAY_VER(i915) >= 20) -#define INTEL_NUM_PIPES(i915) (hweight8(DISPLAY_RUNTIME_INFO(i915)->pipe_mask)) -#define I915_HAS_HOTPLUG(i915) (DISPLAY_INFO(i915)->has_hotplug) -#define OVERLAY_NEEDS_PHYSICAL(i915) (DISPLAY_INFO(i915)->overlay_needs_physical) -#define SUPPORTS_TV(i915) (DISPLAY_INFO(i915)->supports_tv) +#define HAS_4TILE(__display) ((__display)->platform.dg2 || DISPLAY_VER(__display) >= 14) +#define HAS_ASYNC_FLIPS(__display) (DISPLAY_VER(__display) >= 5) +#define HAS_BIGJOINER(__display) (DISPLAY_VER(__display) >= 11 && HAS_DSC(__display)) +#define HAS_CDCLK_CRAWL(__display) (DISPLAY_INFO(__display)->has_cdclk_crawl) +#define HAS_CDCLK_SQUASH(__display) (DISPLAY_INFO(__display)->has_cdclk_squash) +#define HAS_CUR_FBC(__display) (!HAS_GMCH(__display) && IS_DISPLAY_VER(__display, 7, 13)) +#define HAS_D12_PLANE_MINIMIZATION(__display) ((__display)->platform.rocketlake || (__display)->platform.alderlake_s) +#define HAS_DBUF_OVERLAP_DETECTION(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dbuf_overlap_detection) +#define HAS_DDI(__display) (DISPLAY_INFO(__display)->has_ddi) +#define HAS_DISPLAY(__display) (DISPLAY_RUNTIME_INFO(__display)->pipe_mask != 0) +#define HAS_DMC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dmc) +#define HAS_DMC_WAKELOCK(__display) (DISPLAY_VER(__display) >= 20) +#define HAS_DOUBLE_BUFFERED_M_N(__display) (DISPLAY_VER(__display) >= 9 || (__display)->platform.broadwell) +#define HAS_DOUBLE_WIDE(__display) (DISPLAY_VER(__display) < 4) +#define HAS_DP_MST(__display) (DISPLAY_INFO(__display)->has_dp_mst) +#define HAS_DP20(__display) ((__display)->platform.dg2 || DISPLAY_VER(__display) >= 14) +#define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) +#define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) +#define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) +#define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) +#define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) +#define HAS_FPGA_DBG_UNCLAIMED(__display) (DISPLAY_INFO(__display)->has_fpga_dbg) +#define HAS_FW_BLC(__display) (DISPLAY_VER(__display) >= 3) +#define HAS_GMBUS_IRQ(__display) (DISPLAY_VER(__display) >= 4) +#define HAS_GMBUS_BURST_READ(__display) (DISPLAY_VER(__display) >= 10 || (__display)->platform.kabylake) +#define HAS_GMCH(__display) (DISPLAY_INFO(__display)->has_gmch) +#define HAS_HW_SAGV_WM(__display) (DISPLAY_VER(__display) >= 13 && !(__display)->platform.dgfx) +#define HAS_IPC(__display) (DISPLAY_INFO(__display)->has_ipc) +#define HAS_IPS(__display) ((__display)->platform.haswell_ult || (__display)->platform.broadwell) +#define HAS_LRR(__display) (DISPLAY_VER(__display) >= 12) +#define HAS_LSPCON(__display) (IS_DISPLAY_VER(__display, 9, 10)) +#define HAS_MBUS_JOINING(__display) ((__display)->platform.alderlake_p || DISPLAY_VER(__display) >= 14) +#define HAS_MSO(__display) (DISPLAY_VER(__display) >= 12) +#define HAS_OVERLAY(__display) (DISPLAY_INFO(__display)->has_overlay) +#define HAS_PSR(__display) (DISPLAY_INFO(__display)->has_psr) +#define HAS_PSR_HW_TRACKING(__display) (DISPLAY_INFO(__display)->has_psr_hw_tracking) +#define HAS_PSR2_SEL_FETCH(__display) (DISPLAY_VER(__display) >= 12) +#define HAS_SAGV(__display) (DISPLAY_VER(__display) >= 9 && \ + !(__display)->platform.broxton && !(__display)->platform.geminilake) +#define HAS_TRANSCODER(__display, trans) ((DISPLAY_RUNTIME_INFO(__display)->cpu_transcoder_mask & \ + BIT(trans)) != 0) +#define HAS_UNCOMPRESSED_JOINER(__display) (DISPLAY_VER(__display) >= 13) +#define HAS_ULTRAJOINER(__display) ((DISPLAY_VER(__display) >= 20 || \ + ((__display)->platform.dgfx && DISPLAY_VER(__display) == 14)) && \ + HAS_DSC(__display)) +#define HAS_VRR(__display) (DISPLAY_VER(__display) >= 11) +#define HAS_AS_SDP(__display) (DISPLAY_VER(__display) >= 13) +#define HAS_CMRR(__display) (DISPLAY_VER(__display) >= 20) +#define INTEL_NUM_PIPES(__display) (hweight8(DISPLAY_RUNTIME_INFO(__display)->pipe_mask)) +#define I915_HAS_HOTPLUG(__display) (DISPLAY_INFO(__display)->has_hotplug) +#define OVERLAY_NEEDS_PHYSICAL(__display) (DISPLAY_INFO(__display)->overlay_needs_physical) +#define SUPPORTS_TV(__display) (DISPLAY_INFO(__display)->supports_tv) /* Check that device has a display IP version within the specific range. */ -#define IS_DISPLAY_VERx100(__i915, from, until) ( \ +#define IS_DISPLAY_VERx100(__display, from, until) ( \ BUILD_BUG_ON_ZERO((from) < 200) + \ - (DISPLAY_VERx100(__i915) >= (from) && \ - DISPLAY_VERx100(__i915) <= (until))) + (DISPLAY_VERx100(__display) >= (from) && \ + DISPLAY_VERx100(__display) <= (until))) /* * Check if a device has a specific IP version as well as a stepping within the @@ -201,30 +208,30 @@ struct intel_display_platforms { * hardware fix is present and the software workaround is no longer necessary. * E.g., * - * IS_DISPLAY_VERx100_STEP(i915, 1400, STEP_A0, STEP_B2) - * IS_DISPLAY_VERx100_STEP(i915, 1400, STEP_C0, STEP_FOREVER) + * IS_DISPLAY_VERx100_STEP(display, 1400, STEP_A0, STEP_B2) + * IS_DISPLAY_VERx100_STEP(display, 1400, STEP_C0, STEP_FOREVER) * * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper * stepping bound for the specified IP version. */ -#define IS_DISPLAY_VERx100_STEP(__i915, ipver, from, until) \ - (IS_DISPLAY_VERx100((__i915), (ipver), (ipver)) && \ - IS_DISPLAY_STEP((__i915), (from), (until))) +#define IS_DISPLAY_VERx100_STEP(__display, ipver, from, until) \ + (IS_DISPLAY_VERx100((__display), (ipver), (ipver)) && \ + IS_DISPLAY_STEP((__display), (from), (until))) -#define DISPLAY_INFO(i915) (__to_intel_display(i915)->info.__device_info) -#define DISPLAY_RUNTIME_INFO(i915) (&__to_intel_display(i915)->info.__runtime_info) +#define DISPLAY_INFO(__display) (__to_intel_display(__display)->info.__device_info) +#define DISPLAY_RUNTIME_INFO(__display) (&__to_intel_display(__display)->info.__runtime_info) -#define DISPLAY_VER(i915) (DISPLAY_RUNTIME_INFO(i915)->ip.ver) -#define DISPLAY_VERx100(i915) (DISPLAY_RUNTIME_INFO(i915)->ip.ver * 100 + \ - DISPLAY_RUNTIME_INFO(i915)->ip.rel) -#define IS_DISPLAY_VER(i915, from, until) \ - (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) +#define DISPLAY_VER(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver) +#define DISPLAY_VERx100(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver * 100 + \ + DISPLAY_RUNTIME_INFO(__display)->ip.rel) +#define IS_DISPLAY_VER(__display, from, until) \ + (DISPLAY_VER(__display) >= (from) && DISPLAY_VER(__display) <= (until)) -#define INTEL_DISPLAY_STEP(__i915) (DISPLAY_RUNTIME_INFO(__i915)->step) +#define INTEL_DISPLAY_STEP(__display) (DISPLAY_RUNTIME_INFO(__display)->step) -#define IS_DISPLAY_STEP(__i915, since, until) \ - (drm_WARN_ON(__to_intel_display(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ - INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) +#define IS_DISPLAY_STEP(__display, since, until) \ + (drm_WARN_ON(__to_intel_display(__display)->drm, INTEL_DISPLAY_STEP(__display) == STEP_NONE), \ + INTEL_DISPLAY_STEP(__display) >= (since) && INTEL_DISPLAY_STEP(__display) < (until)) struct intel_display_runtime_info { struct intel_display_ip_ver { @@ -283,10 +290,10 @@ struct intel_display_device_info { } color; }; -bool intel_display_device_enabled(struct drm_i915_private *i915); -void intel_display_device_probe(struct drm_i915_private *i915); -void intel_display_device_remove(struct drm_i915_private *i915); -void intel_display_device_info_runtime_init(struct drm_i915_private *i915); +bool intel_display_device_enabled(struct intel_display *display); +struct intel_display *intel_display_device_probe(struct pci_dev *pdev); +void intel_display_device_remove(struct intel_display *display); +void intel_display_device_info_runtime_init(struct intel_display *display); void intel_display_device_info_print(const struct intel_display_device_info *info, const struct intel_display_runtime_info *runtime, diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 56b78cf6b854..50ec0c3c7588 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -80,12 +80,12 @@ bool intel_display_driver_probe_defer(struct pci_dev *pdev) return false; } -void intel_display_driver_init_hw(struct drm_i915_private *i915) +void intel_display_driver_init_hw(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_cdclk_state *cdclk_state; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; cdclk_state = to_intel_cdclk_state(display->cdclk.obj.state); @@ -112,12 +112,12 @@ static const struct drm_mode_config_helper_funcs intel_mode_config_funcs = { .atomic_commit_setup = drm_dp_mst_atomic_setup_commit, }; -static void intel_mode_config_init(struct drm_i915_private *i915) +static void intel_mode_config_init(struct intel_display *display) { - struct drm_mode_config *mode_config = &i915->drm.mode_config; + struct drm_mode_config *mode_config = &display->drm->mode_config; - drm_mode_config_init(&i915->drm); - INIT_LIST_HEAD(&i915->display.global.obj_list); + drm_mode_config_init(display->drm); + INIT_LIST_HEAD(&display->global.obj_list); mode_config->min_width = 0; mode_config->min_height = 0; @@ -128,19 +128,19 @@ static void intel_mode_config_init(struct drm_i915_private *i915) mode_config->funcs = &intel_mode_funcs; mode_config->helper_private = &intel_mode_config_funcs; - mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915); + mode_config->async_page_flip = HAS_ASYNC_FLIPS(display); /* * Maximum framebuffer dimensions, chosen to match * the maximum render engine surface size on gen4+. */ - if (DISPLAY_VER(i915) >= 7) { + if (DISPLAY_VER(display) >= 7) { mode_config->max_width = 16384; mode_config->max_height = 16384; - } else if (DISPLAY_VER(i915) >= 4) { + } else if (DISPLAY_VER(display) >= 4) { mode_config->max_width = 8192; mode_config->max_height = 8192; - } else if (DISPLAY_VER(i915) == 3) { + } else if (DISPLAY_VER(display) == 3) { mode_config->max_width = 4096; mode_config->max_height = 4096; } else { @@ -148,11 +148,11 @@ static void intel_mode_config_init(struct drm_i915_private *i915) mode_config->max_height = 2048; } - if (IS_I845G(i915) || IS_I865G(i915)) { - mode_config->cursor_width = IS_I845G(i915) ? 64 : 512; + if (display->platform.i845g || display->platform.i865g) { + mode_config->cursor_width = display->platform.i845g ? 64 : 512; mode_config->cursor_height = 1023; - } else if (IS_I830(i915) || IS_I85X(i915) || - IS_I915G(i915) || IS_I915GM(i915)) { + } else if (display->platform.i830 || display->platform.i85x || + display->platform.i915g || display->platform.i915gm) { mode_config->cursor_width = 64; mode_config->cursor_height = 64; } else { @@ -161,18 +161,17 @@ static void intel_mode_config_init(struct drm_i915_private *i915) } } -static void intel_mode_config_cleanup(struct drm_i915_private *i915) +static void intel_mode_config_cleanup(struct intel_display *display) { - intel_atomic_global_obj_cleanup(i915); - drm_mode_config_cleanup(&i915->drm); + intel_atomic_global_obj_cleanup(display); + drm_mode_config_cleanup(display->drm); } -static void intel_plane_possible_crtcs_init(struct drm_i915_private *dev_priv) +static void intel_plane_possible_crtcs_init(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct intel_plane *plane; - for_each_intel_plane(&dev_priv->drm, plane) { + for_each_intel_plane(display->drm, plane) { struct intel_crtc *crtc = intel_crtc_for_pipe(display, plane->pipe); @@ -180,41 +179,43 @@ static void intel_plane_possible_crtcs_init(struct drm_i915_private *dev_priv) } } -void intel_display_driver_early_probe(struct drm_i915_private *i915) +void intel_display_driver_early_probe(struct intel_display *display) { - if (!HAS_DISPLAY(i915)) + struct drm_i915_private *i915 = to_i915(display->drm); + + if (!HAS_DISPLAY(display)) return; - spin_lock_init(&i915->display.fb_tracking.lock); - mutex_init(&i915->display.backlight.lock); - mutex_init(&i915->display.audio.mutex); - mutex_init(&i915->display.wm.wm_mutex); - mutex_init(&i915->display.pps.mutex); - mutex_init(&i915->display.hdcp.hdcp_mutex); + spin_lock_init(&display->fb_tracking.lock); + mutex_init(&display->backlight.lock); + mutex_init(&display->audio.mutex); + mutex_init(&display->wm.wm_mutex); + mutex_init(&display->pps.mutex); + mutex_init(&display->hdcp.hdcp_mutex); intel_display_irq_init(i915); intel_dkl_phy_init(i915); - intel_color_init_hooks(&i915->display); - intel_init_cdclk_hooks(&i915->display); + intel_color_init_hooks(display); + intel_init_cdclk_hooks(display); intel_audio_hooks_init(i915); intel_dpll_init_clock_hook(i915); intel_init_display_hooks(i915); intel_fdi_init_hook(i915); - intel_dmc_wl_init(&i915->display); + intel_dmc_wl_init(display); } /* part #1: call before irq install */ -int intel_display_driver_probe_noirq(struct drm_i915_private *i915) +int intel_display_driver_probe_noirq(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); int ret; if (i915_inject_probe_failure(i915)) return -ENODEV; - if (HAS_DISPLAY(i915)) { - ret = drm_vblank_init(&i915->drm, - INTEL_NUM_PIPES(i915)); + if (HAS_DISPLAY(display)) { + ret = drm_vblank_init(display->drm, + INTEL_NUM_PIPES(display)); if (ret) return ret; } @@ -226,24 +227,25 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) goto cleanup_bios; /* FIXME: completely on the wrong abstraction layer */ - ret = intel_power_domains_init(i915); + ret = intel_power_domains_init(display); if (ret < 0) goto cleanup_vga; - intel_pmdemand_init_early(i915); + intel_pmdemand_init_early(display); - intel_power_domains_init_hw(i915, false); + intel_power_domains_init_hw(display, false); - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return 0; intel_dmc_init(display); - i915->display.wq.modeset = alloc_ordered_workqueue("i915_modeset", 0); - i915->display.wq.flip = alloc_workqueue("i915_flip", WQ_HIGHPRI | + display->wq.modeset = alloc_ordered_workqueue("i915_modeset", 0); + display->wq.flip = alloc_workqueue("i915_flip", WQ_HIGHPRI | WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); + display->wq.cleanup = alloc_workqueue("i915_cleanup", WQ_HIGHPRI, 0); - intel_mode_config_init(i915); + intel_mode_config_init(display); ret = intel_cdclk_init(display); if (ret) @@ -261,7 +263,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) if (ret) goto cleanup_vga_client_pw_domain_dmc; - ret = intel_pmdemand_init(i915); + ret = intel_pmdemand_init(display); if (ret) goto cleanup_vga_client_pw_domain_dmc; @@ -273,7 +275,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) cleanup_vga_client_pw_domain_dmc: intel_dmc_fini(display); - intel_power_domains_driver_remove(i915); + intel_power_domains_driver_remove(display); cleanup_vga: intel_vga_unregister(display); cleanup_bios: @@ -282,7 +284,7 @@ cleanup_bios: return ret; } -static void set_display_access(struct drm_i915_private *i915, +static void set_display_access(struct intel_display *display, bool any_task_allowed, struct task_struct *allowed_task) { @@ -290,20 +292,20 @@ static void set_display_access(struct drm_i915_private *i915, int err; intel_modeset_lock_ctx_retry(&ctx, NULL, 0, err) { - err = drm_modeset_lock_all_ctx(&i915->drm, &ctx); + err = drm_modeset_lock_all_ctx(display->drm, &ctx); if (err) continue; - i915->display.access.any_task_allowed = any_task_allowed; - i915->display.access.allowed_task = allowed_task; + display->access.any_task_allowed = any_task_allowed; + display->access.allowed_task = allowed_task; } - drm_WARN_ON(&i915->drm, err); + drm_WARN_ON(display->drm, err); } /** * intel_display_driver_enable_user_access - Enable display HW access for all threads - * @i915: i915 device instance + * @display: display device instance * * Enable the display HW access for all threads. Examples for such accesses * are modeset commits and connector probing. @@ -311,16 +313,18 @@ static void set_display_access(struct drm_i915_private *i915, * This function should be called during driver loading and system resume once * all the HW initialization steps are done. */ -void intel_display_driver_enable_user_access(struct drm_i915_private *i915) +void intel_display_driver_enable_user_access(struct intel_display *display) { - set_display_access(i915, true, NULL); + struct drm_i915_private *i915 = to_i915(display->drm); + + set_display_access(display, true, NULL); intel_hpd_enable_detection_work(i915); } /** * intel_display_driver_disable_user_access - Disable display HW access for user threads - * @i915: i915 device instance + * @display: display device instance * * Disable the display HW access for user threads. Examples for such accesses * are modeset commits and connector probing. For the current thread the @@ -335,16 +339,18 @@ void intel_display_driver_enable_user_access(struct drm_i915_private *i915) * This function should be called during driver loading/unloading and system * suspend/shutdown before starting the HW init/deinit programming. */ -void intel_display_driver_disable_user_access(struct drm_i915_private *i915) +void intel_display_driver_disable_user_access(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); + intel_hpd_disable_detection_work(i915); - set_display_access(i915, false, current); + set_display_access(display, false, current); } /** * intel_display_driver_suspend_access - Suspend display HW access for all threads - * @i915: i915 device instance + * @display: display device instance * * Disable the display HW access for all threads. Examples for such accesses * are modeset commits and connector probing. This call should be either @@ -354,14 +360,14 @@ void intel_display_driver_disable_user_access(struct drm_i915_private *i915) * This function should be called during driver unloading and system * suspend/shutdown after completing the HW deinit programming. */ -void intel_display_driver_suspend_access(struct drm_i915_private *i915) +void intel_display_driver_suspend_access(struct intel_display *display) { - set_display_access(i915, false, NULL); + set_display_access(display, false, NULL); } /** * intel_display_driver_resume_access - Resume display HW access for the resume thread - * @i915: i915 device instance + * @display: display device instance * * Enable the display HW access for the current resume thread, keeping the * access disabled for all other (user) threads. Examples for such accesses @@ -373,14 +379,14 @@ void intel_display_driver_suspend_access(struct drm_i915_private *i915) * This function should be called during system resume before starting the HW * init steps. */ -void intel_display_driver_resume_access(struct drm_i915_private *i915) +void intel_display_driver_resume_access(struct intel_display *display) { - set_display_access(i915, false, current); + set_display_access(display, false, current); } /** * intel_display_driver_check_access - Check if the current thread has disaplay HW access - * @i915: i915 device instance + * @display: display device instance * * Check whether the current thread has display HW access, print a debug * message if it doesn't. Such accesses are modeset commits and connector @@ -389,26 +395,24 @@ void intel_display_driver_resume_access(struct drm_i915_private *i915) * Returns %true if the current thread has display HW access, %false * otherwise. */ -bool intel_display_driver_check_access(struct drm_i915_private *i915) +bool intel_display_driver_check_access(struct intel_display *display) { - char comm[TASK_COMM_LEN]; char current_task[TASK_COMM_LEN + 16]; char allowed_task[TASK_COMM_LEN + 16] = "none"; - if (i915->display.access.any_task_allowed || - i915->display.access.allowed_task == current) + if (display->access.any_task_allowed || + display->access.allowed_task == current) return true; snprintf(current_task, sizeof(current_task), "%s[%d]", - get_task_comm(comm, current), - task_pid_vnr(current)); + current->comm, task_pid_vnr(current)); - if (i915->display.access.allowed_task) + if (display->access.allowed_task) snprintf(allowed_task, sizeof(allowed_task), "%s[%d]", - get_task_comm(comm, i915->display.access.allowed_task), - task_pid_vnr(i915->display.access.allowed_task)); + display->access.allowed_task->comm, + task_pid_vnr(display->access.allowed_task)); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Reject display access from task %s (allowed to %s)\n", current_task, allowed_task); @@ -416,14 +420,13 @@ bool intel_display_driver_check_access(struct drm_i915_private *i915) } /* part #2: call after irq install, but before gem init */ -int intel_display_driver_probe_nogem(struct drm_i915_private *i915) +int intel_display_driver_probe_nogem(struct intel_display *display) { - struct intel_display *display = &i915->display; - struct drm_device *dev = display->drm; + struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe; int ret; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return 0; intel_wm_init(i915); @@ -434,22 +437,22 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) intel_gmbus_setup(display); - drm_dbg_kms(&i915->drm, "%d display pipe%s available.\n", - INTEL_NUM_PIPES(i915), - INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); + drm_dbg_kms(display->drm, "%d display pipe%s available.\n", + INTEL_NUM_PIPES(display), + INTEL_NUM_PIPES(display) > 1 ? "s" : ""); - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { ret = intel_crtc_init(i915, pipe); if (ret) goto err_mode_config; } - intel_plane_possible_crtcs_init(i915); + intel_plane_possible_crtcs_init(display); intel_shared_dpll_init(i915); intel_fdi_pll_freq_update(i915); intel_update_czclk(i915); - intel_display_driver_init_hw(i915); + intel_display_driver_init_hw(display); intel_dpll_update_ref_clks(i915); if (display->cdclk.max_cdclk_freq == 0) @@ -465,21 +468,21 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) if (ret) goto err_hdcp; - intel_display_driver_disable_user_access(i915); + intel_display_driver_disable_user_access(display); - drm_modeset_lock_all(dev); - intel_modeset_setup_hw_state(i915, dev->mode_config.acquire_ctx); + drm_modeset_lock_all(display->drm); + intel_modeset_setup_hw_state(i915, display->drm->mode_config.acquire_ctx); intel_acpi_assign_connector_fwnodes(display); - drm_modeset_unlock_all(dev); + drm_modeset_unlock_all(display->drm); - intel_initial_plane_config(i915); + intel_initial_plane_config(display); /* * Make sure hardware watermarks really match the state we read out. * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH(i915)) + if (!HAS_GMCH(display)) ilk_wm_sanitize(i915); return 0; @@ -487,18 +490,18 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) err_hdcp: intel_hdcp_component_fini(display); err_mode_config: - intel_mode_config_cleanup(i915); + intel_mode_config_cleanup(display); return ret; } /* part #3: call after gem init */ -int intel_display_driver_probe(struct drm_i915_private *i915) +int intel_display_driver_probe(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); int ret; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return 0; /* @@ -514,11 +517,11 @@ int intel_display_driver_probe(struct drm_i915_private *i915) * are already calculated and there is no assert_plane warnings * during bootup. */ - ret = intel_initial_commit(&i915->drm); + ret = intel_initial_commit(display->drm); if (ret) - drm_dbg_kms(&i915->drm, "Initial modeset failed, %d\n", ret); + drm_dbg_kms(display->drm, "Initial modeset failed, %d\n", ret); - intel_overlay_setup(i915); + intel_overlay_setup(display); /* Only enable hotplug handling once the fbdev is fully set up. */ intel_hpd_init(i915); @@ -528,13 +531,13 @@ int intel_display_driver_probe(struct drm_i915_private *i915) return 0; } -void intel_display_driver_register(struct drm_i915_private *i915) +void intel_display_driver_register(struct intel_display *display) { - struct intel_display *display = &i915->display; - struct drm_printer p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, + struct drm_i915_private *i915 = to_i915(display->drm); + struct drm_printer p = drm_dbg_printer(display->drm, DRM_UT_KMS, "i915 display info:"); - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; /* Must be done after probing outputs */ @@ -543,7 +546,7 @@ void intel_display_driver_register(struct drm_i915_private *i915) intel_audio_init(i915); - intel_display_driver_enable_user_access(i915); + intel_display_driver_enable_user_access(display); intel_audio_register(i915); @@ -554,41 +557,42 @@ void intel_display_driver_register(struct drm_i915_private *i915) * fbdev configuration, for which we use the * fbdev->async_cookie. */ - drm_kms_helper_poll_init(&i915->drm); + drm_kms_helper_poll_init(display->drm); intel_hpd_poll_disable(i915); intel_fbdev_setup(i915); - intel_display_device_info_print(DISPLAY_INFO(i915), - DISPLAY_RUNTIME_INFO(i915), &p); + intel_display_device_info_print(DISPLAY_INFO(display), + DISPLAY_RUNTIME_INFO(display), &p); } /* part #1: call before irq uninstall */ -void intel_display_driver_remove(struct drm_i915_private *i915) +void intel_display_driver_remove(struct intel_display *display) { - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; - flush_workqueue(i915->display.wq.flip); - flush_workqueue(i915->display.wq.modeset); + flush_workqueue(display->wq.flip); + flush_workqueue(display->wq.modeset); + flush_workqueue(display->wq.cleanup); /* * MST topology needs to be suspended so we don't have any calls to * fbdev after it's finalized. MST will be destroyed later as part of * drm_mode_config_cleanup() */ - intel_dp_mst_suspend(i915); + intel_dp_mst_suspend(display); } /* part #2: call after irq uninstall */ -void intel_display_driver_remove_noirq(struct drm_i915_private *i915) +void intel_display_driver_remove_noirq(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; - intel_display_driver_suspend_access(i915); + intel_display_driver_suspend_access(display); /* * Due to the hpd irq storm handling the hotplug work can re-arm the @@ -603,55 +607,54 @@ void intel_display_driver_remove_noirq(struct drm_i915_private *i915) intel_hdcp_component_fini(display); - intel_mode_config_cleanup(i915); + intel_mode_config_cleanup(display); intel_dp_tunnel_mgr_cleanup(display); - intel_overlay_cleanup(i915); + intel_overlay_cleanup(display); intel_gmbus_teardown(display); - destroy_workqueue(i915->display.wq.flip); - destroy_workqueue(i915->display.wq.modeset); + destroy_workqueue(display->wq.flip); + destroy_workqueue(display->wq.modeset); + destroy_workqueue(display->wq.cleanup); - intel_fbc_cleanup(&i915->display); + intel_fbc_cleanup(display); } /* part #3: call after gem init */ -void intel_display_driver_remove_nogem(struct drm_i915_private *i915) +void intel_display_driver_remove_nogem(struct intel_display *display) { - struct intel_display *display = &i915->display; - intel_dmc_fini(display); - intel_power_domains_driver_remove(i915); + intel_power_domains_driver_remove(display); intel_vga_unregister(display); intel_bios_driver_remove(display); } -void intel_display_driver_unregister(struct drm_i915_private *i915) +void intel_display_driver_unregister(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; - drm_client_dev_unregister(&i915->drm); + drm_client_dev_unregister(display->drm); /* * After flushing the fbdev (incl. a late async config which * will have delayed queuing of a hotplug event), then flush * the hotplug events. */ - drm_kms_helper_poll_fini(&i915->drm); + drm_kms_helper_poll_fini(display->drm); - intel_display_driver_disable_user_access(i915); + intel_display_driver_disable_user_access(display); intel_audio_deinit(i915); - drm_atomic_helper_shutdown(&i915->drm); + drm_atomic_helper_shutdown(display->drm); acpi_video_unregister(); intel_opregion_unregister(display); @@ -661,30 +664,36 @@ void intel_display_driver_unregister(struct drm_i915_private *i915) * turn all crtc's off, but do not adjust state * This has to be paired with a call to intel_modeset_setup_hw_state. */ -int intel_display_driver_suspend(struct drm_i915_private *i915) +int intel_display_driver_suspend(struct intel_display *display) { struct drm_atomic_state *state; int ret; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return 0; - state = drm_atomic_helper_suspend(&i915->drm); + state = drm_atomic_helper_suspend(display->drm); ret = PTR_ERR_OR_ZERO(state); if (ret) - drm_err(&i915->drm, "Suspending crtc's failed with %i\n", + drm_err(display->drm, "Suspending crtc's failed with %i\n", ret); else - i915->display.restore.modeset_state = state; + display->restore.modeset_state = state; + + /* ensure all DPT VMAs have been unpinned for intel_dpt_suspend() */ + flush_workqueue(display->wq.cleanup); + + intel_dp_mst_suspend(display); + return ret; } int -__intel_display_driver_resume(struct drm_i915_private *i915, +__intel_display_driver_resume(struct intel_display *display, struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct drm_crtc_state *crtc_state; struct drm_crtc *crtc; int ret, i; @@ -710,33 +719,37 @@ __intel_display_driver_resume(struct drm_i915_private *i915, } /* ignore any reset values/BIOS leftovers in the WM registers */ - if (!HAS_GMCH(i915)) + if (!HAS_GMCH(display)) to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_helper_commit_duplicated_state(state, ctx); - drm_WARN_ON(&i915->drm, ret == -EDEADLK); + drm_WARN_ON(display->drm, ret == -EDEADLK); return ret; } -void intel_display_driver_resume(struct drm_i915_private *i915) +void intel_display_driver_resume(struct intel_display *display) { - struct drm_atomic_state *state = i915->display.restore.modeset_state; + struct drm_i915_private *i915 = to_i915(display->drm); + struct drm_atomic_state *state = display->restore.modeset_state; struct drm_modeset_acquire_ctx ctx; int ret; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; - i915->display.restore.modeset_state = NULL; + /* MST sideband requires HPD interrupts enabled */ + intel_dp_mst_resume(display); + + display->restore.modeset_state = NULL; if (state) state->acquire_ctx = &ctx; drm_modeset_acquire_init(&ctx, 0); while (1) { - ret = drm_modeset_lock_all_ctx(&i915->drm, &ctx); + ret = drm_modeset_lock_all_ctx(display->drm, &ctx); if (ret != -EDEADLK) break; @@ -744,14 +757,14 @@ void intel_display_driver_resume(struct drm_i915_private *i915) } if (!ret) - ret = __intel_display_driver_resume(i915, state, &ctx); + ret = __intel_display_driver_resume(display, state, &ctx); skl_watermark_ipc_update(i915); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); if (ret) - drm_err(&i915->drm, + drm_err(display->drm, "Restoring old state failed with %i\n", ret); if (state) drm_atomic_state_put(state); diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.h b/drivers/gpu/drm/i915/display/intel_display_driver.h index 42cc4af6d3fd..2966ff91b219 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.h +++ b/drivers/gpu/drm/i915/display/intel_display_driver.h @@ -9,34 +9,34 @@ #include <linux/types.h> struct drm_atomic_state; -struct drm_i915_private; struct drm_modeset_acquire_ctx; +struct intel_display; struct pci_dev; bool intel_display_driver_probe_defer(struct pci_dev *pdev); -void intel_display_driver_init_hw(struct drm_i915_private *i915); -void intel_display_driver_early_probe(struct drm_i915_private *i915); -int intel_display_driver_probe_noirq(struct drm_i915_private *i915); -int intel_display_driver_probe_nogem(struct drm_i915_private *i915); -int intel_display_driver_probe(struct drm_i915_private *i915); -void intel_display_driver_register(struct drm_i915_private *i915); -void intel_display_driver_remove(struct drm_i915_private *i915); -void intel_display_driver_remove_noirq(struct drm_i915_private *i915); -void intel_display_driver_remove_nogem(struct drm_i915_private *i915); -void intel_display_driver_unregister(struct drm_i915_private *i915); -int intel_display_driver_suspend(struct drm_i915_private *i915); -void intel_display_driver_resume(struct drm_i915_private *i915); +void intel_display_driver_init_hw(struct intel_display *display); +void intel_display_driver_early_probe(struct intel_display *display); +int intel_display_driver_probe_noirq(struct intel_display *display); +int intel_display_driver_probe_nogem(struct intel_display *display); +int intel_display_driver_probe(struct intel_display *display); +void intel_display_driver_register(struct intel_display *display); +void intel_display_driver_remove(struct intel_display *display); +void intel_display_driver_remove_noirq(struct intel_display *display); +void intel_display_driver_remove_nogem(struct intel_display *display); +void intel_display_driver_unregister(struct intel_display *display); +int intel_display_driver_suspend(struct intel_display *display); +void intel_display_driver_resume(struct intel_display *display); /* interface for intel_display_reset.c */ -int __intel_display_driver_resume(struct drm_i915_private *i915, +int __intel_display_driver_resume(struct intel_display *display, struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx); -void intel_display_driver_enable_user_access(struct drm_i915_private *i915); -void intel_display_driver_disable_user_access(struct drm_i915_private *i915); -void intel_display_driver_suspend_access(struct drm_i915_private *i915); -void intel_display_driver_resume_access(struct drm_i915_private *i915); -bool intel_display_driver_check_access(struct drm_i915_private *i915); +void intel_display_driver_enable_user_access(struct intel_display *display); +void intel_display_driver_disable_user_access(struct intel_display *display); +void intel_display_driver_suspend_access(struct intel_display *display); +void intel_display_driver_resume_access(struct intel_display *display); +bool intel_display_driver_check_access(struct intel_display *display); #endif /* __INTEL_DISPLAY_DRIVER_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index e1547ebce60e..069043f9d894 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -434,7 +434,8 @@ void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, spin_lock(&dev_priv->irq_lock); - if (!dev_priv->display.irq.display_irqs_enabled) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + !dev_priv->display.irq.vlv_display_irqs_enabled) { spin_unlock(&dev_priv->irq_lock); return; } @@ -843,7 +844,9 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) { - if (DISPLAY_VER(dev_priv) >= 14) + struct intel_display *display = &dev_priv->display; + + if (DISPLAY_VER(display) >= 14) return MTL_PIPEDMC_ATS_FAULT | MTL_PLANE_ATS_FAULT | GEN12_PIPEDMC_FAULT | @@ -853,7 +856,7 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) GEN9_PIPE_PLANE3_FAULT | GEN9_PIPE_PLANE2_FAULT | GEN9_PIPE_PLANE1_FAULT; - if (DISPLAY_VER(dev_priv) >= 13 || HAS_D12_PLANE_MINIMIZATION(dev_priv)) + if (DISPLAY_VER(display) >= 13 || HAS_D12_PLANE_MINIMIZATION(display)) return GEN12_PIPEDMC_FAULT | GEN9_PIPE_CURSOR_FAULT | GEN11_PIPE_PLANE5_FAULT | @@ -861,7 +864,7 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) GEN9_PIPE_PLANE3_FAULT | GEN9_PIPE_PLANE2_FAULT | GEN9_PIPE_PLANE1_FAULT; - else if (DISPLAY_VER(dev_priv) == 12) + else if (DISPLAY_VER(display) == 12) return GEN12_PIPEDMC_FAULT | GEN9_PIPE_CURSOR_FAULT | GEN11_PIPE_PLANE7_FAULT | @@ -871,7 +874,7 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) GEN9_PIPE_PLANE3_FAULT | GEN9_PIPE_PLANE2_FAULT | GEN9_PIPE_PLANE1_FAULT; - else if (DISPLAY_VER(dev_priv) == 11) + else if (DISPLAY_VER(display) == 11) return GEN9_PIPE_CURSOR_FAULT | GEN11_PIPE_PLANE7_FAULT | GEN11_PIPE_PLANE6_FAULT | @@ -880,7 +883,7 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) GEN9_PIPE_PLANE3_FAULT | GEN9_PIPE_PLANE2_FAULT | GEN9_PIPE_PLANE1_FAULT; - else if (DISPLAY_VER(dev_priv) >= 9) + else if (DISPLAY_VER(display) >= 9) return GEN9_PIPE_CURSOR_FAULT | GEN9_PIPE_PLANE4_FAULT | GEN9_PIPE_PLANE3_FAULT | @@ -1420,7 +1423,6 @@ static void intel_display_vblank_dc_work(struct work_struct *work) { struct intel_display *display = container_of(work, typeof(*display), irq.vblank_dc_work); - struct drm_i915_private *i915 = to_i915(display->drm); int vblank_wa_num_pipes = READ_ONCE(display->irq.vblank_wa_num_pipes); /* @@ -1429,7 +1431,7 @@ static void intel_display_vblank_dc_work(struct work_struct *work) * PSR code. If DC3CO is taken into use we need take that into account * here as well. */ - intel_display_power_set_target_dc_state(i915, vblank_wa_num_pipes ? DC_STATE_DISABLE : + intel_display_power_set_target_dc_state(display, vblank_wa_num_pipes ? DC_STATE_DISABLE : DC_STATE_EN_UPTO_DC6); } @@ -1479,7 +1481,7 @@ void bdw_disable_vblank(struct drm_crtc *_crtc) schedule_work(&display->irq.vblank_dc_work); } -void vlv_display_irq_reset(struct drm_i915_private *dev_priv) +static void _vlv_display_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -1497,6 +1499,12 @@ void vlv_display_irq_reset(struct drm_i915_private *dev_priv) dev_priv->irq_mask = ~0u; } +void vlv_display_irq_reset(struct drm_i915_private *dev_priv) +{ + if (dev_priv->display.irq.vlv_display_irqs_enabled) + _vlv_display_irq_reset(dev_priv); +} + void i9xx_display_irq_reset(struct drm_i915_private *i915) { if (I915_HAS_HOTPLUG(i915)) { @@ -1516,6 +1524,9 @@ void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 enable_mask; enum pipe pipe; + if (!dev_priv->display.irq.vlv_display_irqs_enabled) + return; + pipestat_mask = PIPE_CRC_DONE_INTERRUPT_STATUS; i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS); @@ -1688,13 +1699,13 @@ void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->irq_lock); - if (dev_priv->display.irq.display_irqs_enabled) + if (dev_priv->display.irq.vlv_display_irqs_enabled) return; - dev_priv->display.irq.display_irqs_enabled = true; + dev_priv->display.irq.vlv_display_irqs_enabled = true; if (intel_irqs_enabled(dev_priv)) { - vlv_display_irq_reset(dev_priv); + _vlv_display_irq_reset(dev_priv); vlv_display_irq_postinstall(dev_priv); } } @@ -1703,13 +1714,13 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->irq_lock); - if (!dev_priv->display.irq.display_irqs_enabled) + if (!dev_priv->display.irq.vlv_display_irqs_enabled) return; - dev_priv->display.irq.display_irqs_enabled = false; + dev_priv->display.irq.vlv_display_irqs_enabled = false; if (intel_irqs_enabled(dev_priv)) - vlv_display_irq_reset(dev_priv); + _vlv_display_irq_reset(dev_priv); } void ilk_de_irq_postinstall(struct drm_i915_private *i915) @@ -1902,17 +1913,6 @@ void intel_display_irq_init(struct drm_i915_private *i915) { i915->drm.vblank_disable_immediate = true; - /* - * Most platforms treat the display irq block as an always-on power - * domain. vlv/chv can disable it at runtime and need special care to - * avoid writing any of the display block registers outside of the power - * domain. We defer setting up the display irqs in this case to the - * runtime pm. - */ - i915->display.irq.display_irqs_enabled = true; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - i915->display.irq.display_irqs_enabled = false; - intel_hotplug_irq_init(i915); INIT_WORK(&i915->display.irq.vblank_dc_work, diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c index 024de8abcb1a..f92e4640a613 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -3,8 +3,13 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string_choices.h> + +#include <drm/drm_print.h> + #include "intel_display_params.h" -#include "i915_drv.h" #define intel_display_param_named(name, T, perm, desc) \ module_param_named(name, intel_display_modparams.name, T, perm); \ @@ -123,10 +128,10 @@ intel_display_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400, "(0=disabled, 1=enabled) " "Default: 1"); -intel_display_param_named_unsafe(enable_dmc_wl, bool, 0400, +intel_display_param_named_unsafe(enable_dmc_wl, int, 0400, "Enable DMC wakelock " - "(0=disabled, 1=enabled) " - "Default: 0"); + "(-1=use per-chip default, 0=disabled, 1=enabled) " + "Default: -1"); __maybe_unused static void _param_print_bool(struct drm_printer *p, const char *driver_name, diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h index dcb6face936a..5317138e6044 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -47,7 +47,7 @@ struct drm_printer; param(int, enable_psr, -1, 0600) \ param(bool, psr_safest_params, false, 0400) \ param(bool, enable_psr2_sel_fetch, true, 0400) \ - param(bool, enable_dmc_wl, false, 0400) \ + param(int, enable_dmc_wl, -1, 0400) \ #define MEMBER(T, member, ...) T member; struct intel_display_params { diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 2766fd9208b0..d3b8453a1705 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -28,12 +28,12 @@ #include "skl_watermark_regs.h" #include "vlv_sideband.h" -#define for_each_power_domain_well(__dev_priv, __power_well, __domain) \ - for_each_power_well(__dev_priv, __power_well) \ +#define for_each_power_domain_well(__display, __power_well, __domain) \ + for_each_power_well((__display), __power_well) \ for_each_if(test_bit((__domain), (__power_well)->domains.bits)) -#define for_each_power_domain_well_reverse(__dev_priv, __power_well, __domain) \ - for_each_power_well_reverse(__dev_priv, __power_well) \ +#define for_each_power_domain_well_reverse(__display, __power_well, __domain) \ + for_each_power_well_reverse((__display), __power_well) \ for_each_if(test_bit((__domain), (__power_well)->domains.bits)) static const char * @@ -198,18 +198,18 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) } } -static bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, +static bool __intel_display_power_is_enabled(struct intel_display *display, enum intel_display_power_domain domain) { struct i915_power_well *power_well; bool is_enabled; - if (pm_runtime_suspended(dev_priv->drm.dev)) + if (pm_runtime_suspended(display->drm->dev)) return false; is_enabled = true; - for_each_power_domain_well_reverse(dev_priv, power_well, domain) { + for_each_power_domain_well_reverse(display, power_well, domain) { if (intel_power_well_is_always_on(power_well)) continue; @@ -242,23 +242,22 @@ static bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; + struct intel_display *display = &dev_priv->display; + struct i915_power_domains *power_domains = &display->power.domains; bool ret; - power_domains = &dev_priv->display.power.domains; - mutex_lock(&power_domains->lock); - ret = __intel_display_power_is_enabled(dev_priv, domain); + ret = __intel_display_power_is_enabled(display, domain); mutex_unlock(&power_domains->lock); return ret; } static u32 -sanitize_target_dc_state(struct drm_i915_private *i915, +sanitize_target_dc_state(struct intel_display *display, u32 target_dc_state) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; static const u32 states[] = { DC_STATE_EN_UPTO_DC6, DC_STATE_EN_UPTO_DC5, @@ -282,43 +281,43 @@ sanitize_target_dc_state(struct drm_i915_private *i915, /** * intel_display_power_set_target_dc_state - Set target dc state. - * @dev_priv: i915 device + * @display: display device * @state: state which needs to be set as target_dc_state. * * This function set the "DC off" power well target_dc_state, * based upon this target_dc_stste, "DC off" power well will * enable desired DC state. */ -void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, +void intel_display_power_set_target_dc_state(struct intel_display *display, u32 state) { struct i915_power_well *power_well; bool dc_off_enabled; - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; mutex_lock(&power_domains->lock); - power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF); + power_well = lookup_power_well(display, SKL_DISP_DC_OFF); - if (drm_WARN_ON(&dev_priv->drm, !power_well)) + if (drm_WARN_ON(display->drm, !power_well)) goto unlock; - state = sanitize_target_dc_state(dev_priv, state); + state = sanitize_target_dc_state(display, state); if (state == power_domains->target_dc_state) goto unlock; - dc_off_enabled = intel_power_well_is_enabled(dev_priv, power_well); + dc_off_enabled = intel_power_well_is_enabled(display, power_well); /* * If DC off power well is disabled, need to enable and disable the * DC off power well to effect target DC state. */ if (!dc_off_enabled) - intel_power_well_enable(dev_priv, power_well); + intel_power_well_enable(display, power_well); power_domains->target_dc_state = state; if (!dc_off_enabled) - intel_power_well_disable(dev_priv, power_well); + intel_power_well_disable(display, power_well); unlock: mutex_unlock(&power_domains->lock); @@ -338,11 +337,11 @@ static void __async_put_domains_mask(struct i915_power_domains *power_domains, static bool assert_async_put_domain_masks_disjoint(struct i915_power_domains *power_domains) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); - return !drm_WARN_ON(&i915->drm, + return !drm_WARN_ON(display->drm, bitmap_intersects(power_domains->async_put_domains[0].bits, power_domains->async_put_domains[1].bits, POWER_DOMAIN_NUM)); @@ -351,21 +350,21 @@ assert_async_put_domain_masks_disjoint(struct i915_power_domains *power_domains) static bool __async_put_domains_state_ok(struct i915_power_domains *power_domains) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); struct intel_power_domain_mask async_put_mask; enum intel_display_power_domain domain; bool err = false; err |= !assert_async_put_domain_masks_disjoint(power_domains); __async_put_domains_mask(power_domains, &async_put_mask); - err |= drm_WARN_ON(&i915->drm, + err |= drm_WARN_ON(display->drm, !!power_domains->async_put_wakeref != !bitmap_empty(async_put_mask.bits, POWER_DOMAIN_NUM)); for_each_power_domain(domain, &async_put_mask) - err |= drm_WARN_ON(&i915->drm, + err |= drm_WARN_ON(display->drm, power_domains->domain_use_count[domain] != 1); return !err; @@ -374,27 +373,27 @@ __async_put_domains_state_ok(struct i915_power_domains *power_domains) static void print_power_domains(struct i915_power_domains *power_domains, const char *prefix, struct intel_power_domain_mask *mask) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); enum intel_display_power_domain domain; - drm_dbg(&i915->drm, "%s (%d):\n", prefix, bitmap_weight(mask->bits, POWER_DOMAIN_NUM)); + drm_dbg_kms(display->drm, "%s (%d):\n", prefix, bitmap_weight(mask->bits, POWER_DOMAIN_NUM)); for_each_power_domain(domain, mask) - drm_dbg(&i915->drm, "%s use_count %d\n", - intel_display_power_domain_str(domain), - power_domains->domain_use_count[domain]); + drm_dbg_kms(display->drm, "%s use_count %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); } static void print_async_put_domains_state(struct i915_power_domains *power_domains) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); - drm_dbg(&i915->drm, "async_put_wakeref: %s\n", - str_yes_no(power_domains->async_put_wakeref)); + drm_dbg_kms(display->drm, "async_put_wakeref: %s\n", + str_yes_no(power_domains->async_put_wakeref)); print_power_domains(power_domains, "async_put_domains[0]", &power_domains->async_put_domains[0]); @@ -454,10 +453,11 @@ cancel_async_put_work(struct i915_power_domains *power_domains, bool sync) } static bool -intel_display_power_grab_async_put_ref(struct drm_i915_private *dev_priv, +intel_display_power_grab_async_put_ref(struct intel_display *display, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct drm_i915_private *dev_priv = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; struct intel_power_domain_mask async_put_mask; bool ret = false; @@ -483,17 +483,17 @@ out_verify: } static void -__intel_display_power_get_domain(struct drm_i915_private *dev_priv, +__intel_display_power_get_domain(struct intel_display *display, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; - if (intel_display_power_grab_async_put_ref(dev_priv, domain)) + if (intel_display_power_grab_async_put_ref(display, domain)) return; - for_each_power_domain_well(dev_priv, power_well, domain) - intel_power_well_get(dev_priv, power_well); + for_each_power_domain_well(display, power_well, domain) + intel_power_well_get(display, power_well); power_domains->domain_use_count[domain]++; } @@ -513,11 +513,12 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct intel_display *display = &dev_priv->display; + struct i915_power_domains *power_domains = &display->power.domains; intel_wakeref_t wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&power_domains->lock); - __intel_display_power_get_domain(dev_priv, domain); + __intel_display_power_get_domain(display, domain); mutex_unlock(&power_domains->lock); return wakeref; @@ -539,7 +540,8 @@ intel_wakeref_t intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct intel_display *display = &dev_priv->display; + struct i915_power_domains *power_domains = &display->power.domains; intel_wakeref_t wakeref; bool is_enabled; @@ -549,8 +551,8 @@ intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); - if (__intel_display_power_is_enabled(dev_priv, domain)) { - __intel_display_power_get_domain(dev_priv, domain); + if (__intel_display_power_is_enabled(display, domain)) { + __intel_display_power_get_domain(display, domain); is_enabled = true; } else { is_enabled = false; @@ -567,38 +569,36 @@ intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, } static void -__intel_display_power_put_domain(struct drm_i915_private *dev_priv, +__intel_display_power_put_domain(struct intel_display *display, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; const char *name = intel_display_power_domain_str(domain); struct intel_power_domain_mask async_put_mask; - power_domains = &dev_priv->display.power.domains; - - drm_WARN(&dev_priv->drm, !power_domains->domain_use_count[domain], + drm_WARN(display->drm, !power_domains->domain_use_count[domain], "Use count on domain %s is already zero\n", name); async_put_domains_mask(power_domains, &async_put_mask); - drm_WARN(&dev_priv->drm, + drm_WARN(display->drm, test_bit(domain, async_put_mask.bits), "Async disabling of domain %s is pending\n", name); power_domains->domain_use_count[domain]--; - for_each_power_domain_well_reverse(dev_priv, power_well, domain) - intel_power_well_put(dev_priv, power_well); + for_each_power_domain_well_reverse(display, power_well, domain) + intel_power_well_put(display, power_well); } -static void __intel_display_power_put(struct drm_i915_private *dev_priv, +static void __intel_display_power_put(struct intel_display *display, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; mutex_lock(&power_domains->lock); - __intel_display_power_put_domain(dev_priv, domain); + __intel_display_power_put_domain(display, domain); mutex_unlock(&power_domains->lock); } @@ -607,23 +607,24 @@ queue_async_put_domains_work(struct i915_power_domains *power_domains, intel_wakeref_t wakeref, int delay_ms) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); - drm_WARN_ON(&i915->drm, power_domains->async_put_wakeref); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); + drm_WARN_ON(display->drm, power_domains->async_put_wakeref); power_domains->async_put_wakeref = wakeref; - drm_WARN_ON(&i915->drm, !queue_delayed_work(system_unbound_wq, - &power_domains->async_put_work, - msecs_to_jiffies(delay_ms))); + drm_WARN_ON(display->drm, !queue_delayed_work(system_unbound_wq, + &power_domains->async_put_work, + msecs_to_jiffies(delay_ms))); } static void release_async_put_domains(struct i915_power_domains *power_domains, struct intel_power_domain_mask *mask) { - struct drm_i915_private *dev_priv = - container_of(power_domains, struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; enum intel_display_power_domain domain; intel_wakeref_t wakeref; @@ -633,7 +634,7 @@ release_async_put_domains(struct i915_power_domains *power_domains, for_each_power_domain(domain, mask) { /* Clear before put, so put's sanity check is happy. */ async_put_domains_clear_domain(power_domains, domain); - __intel_display_power_put_domain(dev_priv, domain); + __intel_display_power_put_domain(display, domain); } intel_runtime_pm_put(rpm, wakeref); @@ -642,10 +643,10 @@ release_async_put_domains(struct i915_power_domains *power_domains, static void intel_display_power_put_async_work(struct work_struct *work) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - display.power.domains.async_put_work.work); - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct intel_display *display = container_of(work, struct intel_display, + power.domains.async_put_work.work); + struct drm_i915_private *dev_priv = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; intel_wakeref_t new_work_wakeref = intel_runtime_pm_get_raw(rpm); intel_wakeref_t old_work_wakeref = NULL; @@ -711,7 +712,8 @@ void __intel_display_power_put_async(struct drm_i915_private *i915, intel_wakeref_t wakeref, int delay_ms) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct intel_display *display = &i915->display; + struct i915_power_domains *power_domains = &display->power.domains; struct intel_runtime_pm *rpm = &i915->runtime_pm; intel_wakeref_t work_wakeref = intel_runtime_pm_get_raw(rpm); @@ -720,12 +722,12 @@ void __intel_display_power_put_async(struct drm_i915_private *i915, mutex_lock(&power_domains->lock); if (power_domains->domain_use_count[domain] > 1) { - __intel_display_power_put_domain(i915, domain); + __intel_display_power_put_domain(display, domain); goto out_verify; } - drm_WARN_ON(&i915->drm, power_domains->domain_use_count[domain] != 1); + drm_WARN_ON(display->drm, power_domains->domain_use_count[domain] != 1); /* Let a pending work requeue itself or queue a new one. */ if (power_domains->async_put_wakeref) { @@ -764,7 +766,8 @@ out_verify: */ void intel_display_power_flush_work(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct intel_display *display = &i915->display; + struct i915_power_domains *power_domains = &display->power.domains; struct intel_power_domain_mask async_put_mask; intel_wakeref_t work_wakeref; @@ -789,22 +792,23 @@ out_verify: /** * intel_display_power_flush_work_sync - flushes and syncs the async display power disabling work - * @i915: i915 device instance + * @display: display device instance * * Like intel_display_power_flush_work(), but also ensure that the work * handler function is not running any more when this function returns. */ static void -intel_display_power_flush_work_sync(struct drm_i915_private *i915) +intel_display_power_flush_work_sync(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct drm_i915_private *i915 = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; intel_display_power_flush_work(i915); cancel_async_put_work(power_domains, true); verify_async_put_domains_state(power_domains); - drm_WARN_ON(&i915->drm, power_domains->async_put_wakeref); + drm_WARN_ON(display->drm, power_domains->async_put_wakeref); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) @@ -822,7 +826,9 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain, intel_wakeref_t wakeref) { - __intel_display_power_put(dev_priv, domain); + struct intel_display *display = &dev_priv->display; + + __intel_display_power_put(display, domain); intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); } #else @@ -842,7 +848,9 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - __intel_display_power_put(dev_priv, domain); + struct intel_display *display = &dev_priv->display; + + __intel_display_power_put(display, domain); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } #endif @@ -852,9 +860,10 @@ intel_display_power_get_in_set(struct drm_i915_private *i915, struct intel_display_power_domain_set *power_domain_set, enum intel_display_power_domain domain) { + struct intel_display *display = &i915->display; intel_wakeref_t __maybe_unused wf; - drm_WARN_ON(&i915->drm, test_bit(domain, power_domain_set->mask.bits)); + drm_WARN_ON(display->drm, test_bit(domain, power_domain_set->mask.bits)); wf = intel_display_power_get(i915, domain); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) @@ -868,9 +877,10 @@ intel_display_power_get_in_set_if_enabled(struct drm_i915_private *i915, struct intel_display_power_domain_set *power_domain_set, enum intel_display_power_domain domain) { + struct intel_display *display = &i915->display; intel_wakeref_t wf; - drm_WARN_ON(&i915->drm, test_bit(domain, power_domain_set->mask.bits)); + drm_WARN_ON(display->drm, test_bit(domain, power_domain_set->mask.bits)); wf = intel_display_power_get_if_enabled(i915, domain); if (!wf) @@ -889,9 +899,10 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, struct intel_display_power_domain_set *power_domain_set, struct intel_power_domain_mask *mask) { + struct intel_display *display = &i915->display; enum intel_display_power_domain domain; - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, !bitmap_subset(mask->bits, power_domain_set->mask.bits, POWER_DOMAIN_NUM)); for_each_power_domain(domain, mask) { @@ -906,8 +917,7 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, } static int -sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, - int disable_power_well) +sanitize_disable_power_well_option(int disable_power_well) { if (disable_power_well >= 0) return !!disable_power_well; @@ -915,27 +925,26 @@ sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, return 1; } -static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, - int enable_dc) +static u32 get_allowed_dc_mask(struct intel_display *display, int enable_dc) { u32 mask; int requested_dc; int max_dc; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return 0; - if (DISPLAY_VER(dev_priv) >= 20) + if (DISPLAY_VER(display) >= 20) max_dc = 2; - else if (IS_DG2(dev_priv)) + else if (display->platform.dg2) max_dc = 1; - else if (IS_DG1(dev_priv)) + else if (display->platform.dg1) max_dc = 3; - else if (DISPLAY_VER(dev_priv) >= 12) + else if (DISPLAY_VER(display) >= 12) max_dc = 4; - else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) + else if (display->platform.geminilake || display->platform.broxton) max_dc = 1; - else if (DISPLAY_VER(dev_priv) >= 9) + else if (DISPLAY_VER(display) >= 9) max_dc = 2; else max_dc = 0; @@ -945,11 +954,10 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ - mask = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || - DISPLAY_VER(dev_priv) >= 11 ? - DC_STATE_EN_DC9 : 0; + mask = display->platform.geminilake || display->platform.broxton || + DISPLAY_VER(display) >= 11 ? DC_STATE_EN_DC9 : 0; - if (!dev_priv->display.params.disable_power_well) + if (!display->params.disable_power_well) max_dc = 0; if (enable_dc >= 0 && enable_dc <= max_dc) { @@ -957,12 +965,12 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, } else if (enable_dc == -1) { requested_dc = max_dc; } else if (enable_dc > max_dc && enable_dc <= 4) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Adjusting requested max DC state (%d->%d)\n", enable_dc, max_dc); requested_dc = max_dc; } else { - drm_err(&dev_priv->drm, + drm_err(display->drm, "Unexpected value for enable_dc (%d)\n", enable_dc); requested_dc = max_dc; } @@ -982,30 +990,29 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, break; } - drm_dbg_kms(&dev_priv->drm, "Allowed DC state mask %02x\n", mask); + drm_dbg_kms(display->drm, "Allowed DC state mask %02x\n", mask); return mask; } /** * intel_power_domains_init - initializes the power domain structures - * @dev_priv: i915 device instance + * @display: display device instance * * Initializes the power domain structures for @dev_priv depending upon the * supported platform. */ -int intel_power_domains_init(struct drm_i915_private *dev_priv) +int intel_power_domains_init(struct intel_display *display) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; - dev_priv->display.params.disable_power_well = - sanitize_disable_power_well_option(dev_priv, - dev_priv->display.params.disable_power_well); + display->params.disable_power_well = + sanitize_disable_power_well_option(display->params.disable_power_well); power_domains->allowed_dc_mask = - get_allowed_dc_mask(dev_priv, dev_priv->display.params.enable_dc); + get_allowed_dc_mask(display, display->params.enable_dc); power_domains->target_dc_state = - sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + sanitize_target_dc_state(display, DC_STATE_EN_UPTO_DC6); mutex_init(&power_domains->lock); @@ -1017,39 +1024,39 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) /** * intel_power_domains_cleanup - clean up power domains resources - * @dev_priv: i915 device instance + * @display: display device instance * * Release any resources acquired by intel_power_domains_init() */ -void intel_power_domains_cleanup(struct drm_i915_private *dev_priv) +void intel_power_domains_cleanup(struct intel_display *display) { - intel_display_power_map_cleanup(&dev_priv->display.power.domains); + intel_display_power_map_cleanup(&display->power.domains); } -static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) +static void intel_power_domains_sync_hw(struct intel_display *display) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; mutex_lock(&power_domains->lock); - for_each_power_well(dev_priv, power_well) - intel_power_well_sync_hw(dev_priv, power_well); + for_each_power_well(display, power_well) + intel_power_well_sync_hw(display, power_well); mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, +static void gen9_dbuf_slice_set(struct intel_display *display, enum dbuf_slice slice, bool enable) { i915_reg_t reg = DBUF_CTL_S(slice); bool state; - intel_de_rmw(dev_priv, reg, DBUF_POWER_REQUEST, + intel_de_rmw(display, reg, DBUF_POWER_REQUEST, enable ? DBUF_POWER_REQUEST : 0); - intel_de_posting_read(dev_priv, reg); + intel_de_posting_read(display, reg); udelay(10); - state = intel_de_read(dev_priv, reg) & DBUF_POWER_STATE; - drm_WARN(&dev_priv->drm, enable != state, + state = intel_de_read(display, reg) & DBUF_POWER_STATE; + drm_WARN(display->drm, enable != state, "DBuf slice %d power %s timeout!\n", slice, str_enable_disable(enable)); } @@ -1057,15 +1064,16 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - struct i915_power_domains *power_domains = &dev_priv->display.power.domains; - u8 slice_mask = DISPLAY_INFO(dev_priv)->dbuf.slice_mask; + struct intel_display *display = &dev_priv->display; + struct i915_power_domains *power_domains = &display->power.domains; + u8 slice_mask = DISPLAY_INFO(display)->dbuf.slice_mask; enum dbuf_slice slice; - drm_WARN(&dev_priv->drm, req_slices & ~slice_mask, + drm_WARN(display->drm, req_slices & ~slice_mask, "Invalid set of dbuf slices (0x%x) requested (total dbuf slices 0x%x)\n", req_slices, slice_mask); - drm_dbg_kms(&dev_priv->drm, "Updating dbuf slices to 0x%x\n", + drm_dbg_kms(display->drm, "Updating dbuf slices to 0x%x\n", req_slices); /* @@ -1077,25 +1085,25 @@ void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, */ mutex_lock(&power_domains->lock); - for_each_dbuf_slice(dev_priv, slice) - gen9_dbuf_slice_set(dev_priv, slice, req_slices & BIT(slice)); + for_each_dbuf_slice(display, slice) + gen9_dbuf_slice_set(display, slice, req_slices & BIT(slice)); - dev_priv->display.dbuf.enabled_slices = req_slices; + display->dbuf.enabled_slices = req_slices; mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +static void gen9_dbuf_enable(struct intel_display *display) { + struct drm_i915_private *dev_priv = to_i915(display->drm); u8 slices_mask; - dev_priv->display.dbuf.enabled_slices = - intel_enabled_dbuf_slices_mask(dev_priv); + display->dbuf.enabled_slices = intel_enabled_dbuf_slices_mask(dev_priv); - slices_mask = BIT(DBUF_S1) | dev_priv->display.dbuf.enabled_slices; + slices_mask = BIT(DBUF_S1) | display->dbuf.enabled_slices; - if (DISPLAY_VER(dev_priv) >= 14) - intel_pmdemand_program_dbuf(dev_priv, slices_mask); + if (DISPLAY_VER(display) >= 14) + intel_pmdemand_program_dbuf(display, slices_mask); /* * Just power up at least 1 slice, we will @@ -1104,33 +1112,35 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) gen9_dbuf_slices_update(dev_priv, slices_mask); } -static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) +static void gen9_dbuf_disable(struct intel_display *display) { + struct drm_i915_private *dev_priv = to_i915(display->drm); + gen9_dbuf_slices_update(dev_priv, 0); - if (DISPLAY_VER(dev_priv) >= 14) - intel_pmdemand_program_dbuf(dev_priv, 0); + if (DISPLAY_VER(display) >= 14) + intel_pmdemand_program_dbuf(display, 0); } -static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) +static void gen12_dbuf_slices_config(struct intel_display *display) { enum dbuf_slice slice; - if (IS_ALDERLAKE_P(dev_priv)) + if (display->platform.alderlake_p) return; - for_each_dbuf_slice(dev_priv, slice) - intel_de_rmw(dev_priv, DBUF_CTL_S(slice), + for_each_dbuf_slice(display, slice) + intel_de_rmw(display, DBUF_CTL_S(slice), DBUF_TRACKER_STATE_SERVICE_MASK, DBUF_TRACKER_STATE_SERVICE(8)); } -static void icl_mbus_init(struct drm_i915_private *dev_priv) +static void icl_mbus_init(struct intel_display *display) { - unsigned long abox_regs = DISPLAY_INFO(dev_priv)->abox_mask; + unsigned long abox_regs = DISPLAY_INFO(display)->abox_mask; u32 mask, val, i; - if (IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER(dev_priv) >= 14) + if (display->platform.alderlake_p || DISPLAY_VER(display) >= 14) return; mask = MBUS_ABOX_BT_CREDIT_POOL1_MASK | @@ -1147,16 +1157,16 @@ static void icl_mbus_init(struct drm_i915_private *dev_priv) * expect us to program the abox_ctl0 register as well, even though * we don't have to program other instance-0 registers like BW_BUDDY. */ - if (DISPLAY_VER(dev_priv) == 12) + if (DISPLAY_VER(display) == 12) abox_regs |= BIT(0); for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) - intel_de_rmw(dev_priv, MBUS_ABOX_CTL(i), mask, val); + intel_de_rmw(display, MBUS_ABOX_CTL(i), mask, val); } -static void hsw_assert_cdclk(struct drm_i915_private *dev_priv) +static void hsw_assert_cdclk(struct intel_display *display) { - u32 val = intel_de_read(dev_priv, LCPLL_CTL); + u32 val = intel_de_read(display, LCPLL_CTL); /* * The LCPLL register should be turned on by the BIOS. For now @@ -1165,18 +1175,18 @@ static void hsw_assert_cdclk(struct drm_i915_private *dev_priv) */ if (val & LCPLL_CD_SOURCE_FCLK) - drm_err(&dev_priv->drm, "CDCLK source is not LCPLL\n"); + drm_err(display->drm, "CDCLK source is not LCPLL\n"); if (val & LCPLL_PLL_DISABLE) - drm_err(&dev_priv->drm, "LCPLL is disabled\n"); + drm_err(display->drm, "LCPLL is disabled\n"); if ((val & LCPLL_REF_MASK) != LCPLL_REF_NON_SSC) - drm_err(&dev_priv->drm, "LCPLL not using non-SSC reference\n"); + drm_err(display->drm, "LCPLL not using non-SSC reference\n"); } -static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) +static void assert_can_disable_lcpll(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_crtc *crtc; for_each_intel_crtc(display->drm, crtc) @@ -1201,7 +1211,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) INTEL_DISPLAY_STATE_WARN(display, intel_de_read(display, BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE, "CPU PWM1 enabled\n"); - if (IS_HASWELL(dev_priv)) + if (display->platform.haswell) INTEL_DISPLAY_STATE_WARN(display, intel_de_read(display, HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE, "CPU PWM2 enabled\n"); @@ -1225,23 +1235,24 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) "IRQs enabled\n"); } -static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) +static u32 hsw_read_dcomp(struct intel_display *display) { - if (IS_HASWELL(dev_priv)) - return intel_de_read(dev_priv, D_COMP_HSW); + if (display->platform.haswell) + return intel_de_read(display, D_COMP_HSW); else - return intel_de_read(dev_priv, D_COMP_BDW); + return intel_de_read(display, D_COMP_BDW); } -static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) +static void hsw_write_dcomp(struct intel_display *display, u32 val) { - if (IS_HASWELL(dev_priv)) { + struct drm_i915_private *dev_priv = to_i915(display->drm); + + if (display->platform.haswell) { if (snb_pcode_write(&dev_priv->uncore, GEN6_PCODE_WRITE_D_COMP, val)) - drm_dbg_kms(&dev_priv->drm, - "Failed to write to D_COMP\n"); + drm_dbg_kms(display->drm, "Failed to write to D_COMP\n"); } else { - intel_de_write(dev_priv, D_COMP_BDW, val); - intel_de_posting_read(dev_priv, D_COMP_BDW); + intel_de_write(display, D_COMP_BDW, val); + intel_de_posting_read(display, D_COMP_BDW); } } @@ -1253,45 +1264,45 @@ static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) * register. Callers should take care of disabling all the display engine * functions, doing the mode unset, fixing interrupts, etc. */ -static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, +static void hsw_disable_lcpll(struct intel_display *display, bool switch_to_fclk, bool allow_power_down) { u32 val; - assert_can_disable_lcpll(dev_priv); + assert_can_disable_lcpll(display); - val = intel_de_read(dev_priv, LCPLL_CTL); + val = intel_de_read(display, LCPLL_CTL); if (switch_to_fclk) { val |= LCPLL_CD_SOURCE_FCLK; - intel_de_write(dev_priv, LCPLL_CTL, val); + intel_de_write(display, LCPLL_CTL, val); - if (wait_for_us(intel_de_read(dev_priv, LCPLL_CTL) & + if (wait_for_us(intel_de_read(display, LCPLL_CTL) & LCPLL_CD_SOURCE_FCLK_DONE, 1)) - drm_err(&dev_priv->drm, "Switching to FCLK failed\n"); + drm_err(display->drm, "Switching to FCLK failed\n"); - val = intel_de_read(dev_priv, LCPLL_CTL); + val = intel_de_read(display, LCPLL_CTL); } val |= LCPLL_PLL_DISABLE; - intel_de_write(dev_priv, LCPLL_CTL, val); - intel_de_posting_read(dev_priv, LCPLL_CTL); + intel_de_write(display, LCPLL_CTL, val); + intel_de_posting_read(display, LCPLL_CTL); - if (intel_de_wait_for_clear(dev_priv, LCPLL_CTL, LCPLL_PLL_LOCK, 1)) - drm_err(&dev_priv->drm, "LCPLL still locked\n"); + if (intel_de_wait_for_clear(display, LCPLL_CTL, LCPLL_PLL_LOCK, 1)) + drm_err(display->drm, "LCPLL still locked\n"); - val = hsw_read_dcomp(dev_priv); + val = hsw_read_dcomp(display); val |= D_COMP_COMP_DISABLE; - hsw_write_dcomp(dev_priv, val); + hsw_write_dcomp(display, val); ndelay(100); - if (wait_for((hsw_read_dcomp(dev_priv) & + if (wait_for((hsw_read_dcomp(display) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1)) - drm_err(&dev_priv->drm, "D_COMP RCOMP still in progress\n"); + drm_err(display->drm, "D_COMP RCOMP still in progress\n"); if (allow_power_down) { - intel_de_rmw(dev_priv, LCPLL_CTL, 0, LCPLL_POWER_DOWN_ALLOW); - intel_de_posting_read(dev_priv, LCPLL_CTL); + intel_de_rmw(display, LCPLL_CTL, 0, LCPLL_POWER_DOWN_ALLOW); + intel_de_posting_read(display, LCPLL_CTL); } } @@ -1299,12 +1310,12 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, * Fully restores LCPLL, disallowing power down and switching back to LCPLL * source. */ -static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) +static void hsw_restore_lcpll(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private __maybe_unused *dev_priv = to_i915(display->drm); u32 val; - val = intel_de_read(dev_priv, LCPLL_CTL); + val = intel_de_read(display, LCPLL_CTL); if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK | LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK) @@ -1318,28 +1329,28 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) if (val & LCPLL_POWER_DOWN_ALLOW) { val &= ~LCPLL_POWER_DOWN_ALLOW; - intel_de_write(dev_priv, LCPLL_CTL, val); - intel_de_posting_read(dev_priv, LCPLL_CTL); + intel_de_write(display, LCPLL_CTL, val); + intel_de_posting_read(display, LCPLL_CTL); } - val = hsw_read_dcomp(dev_priv); + val = hsw_read_dcomp(display); val |= D_COMP_COMP_FORCE; val &= ~D_COMP_COMP_DISABLE; - hsw_write_dcomp(dev_priv, val); + hsw_write_dcomp(display, val); - val = intel_de_read(dev_priv, LCPLL_CTL); + val = intel_de_read(display, LCPLL_CTL); val &= ~LCPLL_PLL_DISABLE; - intel_de_write(dev_priv, LCPLL_CTL, val); + intel_de_write(display, LCPLL_CTL, val); - if (intel_de_wait_for_set(dev_priv, LCPLL_CTL, LCPLL_PLL_LOCK, 5)) - drm_err(&dev_priv->drm, "LCPLL not locked yet\n"); + if (intel_de_wait_for_set(display, LCPLL_CTL, LCPLL_PLL_LOCK, 5)) + drm_err(display->drm, "LCPLL not locked yet\n"); if (val & LCPLL_CD_SOURCE_FCLK) { - intel_de_rmw(dev_priv, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); + intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - if (wait_for_us((intel_de_read(dev_priv, LCPLL_CTL) & + if (wait_for_us((intel_de_read(display, LCPLL_CTL) & LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "Switching back to LCPLL failed\n"); } @@ -1372,36 +1383,42 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) * For more, read "Display Sequences for Package C8" on the hardware * documentation. */ -static void hsw_enable_pc8(struct drm_i915_private *dev_priv) +static void hsw_enable_pc8(struct intel_display *display) { - drm_dbg_kms(&dev_priv->drm, "Enabling package C8+\n"); + struct drm_i915_private *dev_priv = to_i915(display->drm); + + drm_dbg_kms(display->drm, "Enabling package C8+\n"); if (HAS_PCH_LPT_LP(dev_priv)) - intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, PCH_LP_PARTITION_LEVEL_DISABLE, 0); lpt_disable_clkout_dp(dev_priv); - hsw_disable_lcpll(dev_priv, true, true); + hsw_disable_lcpll(display, true, true); } -static void hsw_disable_pc8(struct drm_i915_private *dev_priv) +static void hsw_disable_pc8(struct intel_display *display) { - drm_dbg_kms(&dev_priv->drm, "Disabling package C8+\n"); + struct drm_i915_private *dev_priv = to_i915(display->drm); + + drm_dbg_kms(display->drm, "Disabling package C8+\n"); - hsw_restore_lcpll(dev_priv); + hsw_restore_lcpll(display); intel_init_pch_refclk(dev_priv); /* Many display registers don't survive PC8+ */ +#ifdef I915 /* FIXME */ intel_clock_gating_init(dev_priv); +#endif } -static void intel_pch_reset_handshake(struct drm_i915_private *dev_priv, +static void intel_pch_reset_handshake(struct intel_display *display, bool enable) { i915_reg_t reg; u32 reset_bits; - if (IS_IVYBRIDGE(dev_priv)) { + if (display->platform.ivybridge) { reg = GEN7_MSG_CTL; reset_bits = WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK; } else { @@ -1409,59 +1426,58 @@ static void intel_pch_reset_handshake(struct drm_i915_private *dev_priv, reset_bits = RESET_PCH_HANDSHAKE_ENABLE; } - if (DISPLAY_VER(dev_priv) >= 14) + if (DISPLAY_VER(display) >= 14) reset_bits |= MTL_RESET_PICA_HANDSHAKE_EN; - intel_de_rmw(dev_priv, reg, reset_bits, enable ? reset_bits : 0); + intel_de_rmw(display, reg, reset_bits, enable ? reset_bits : 0); } -static void skl_display_core_init(struct drm_i915_private *dev_priv, +static void skl_display_core_init(struct intel_display *display, bool resume) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; gen9_set_dc_state(display, DC_STATE_DISABLE); /* enable PCH reset handshake */ - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv)); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; /* enable PG1 and Misc I/O */ mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_enable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_enable(display, well); - well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO); - intel_power_well_enable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_MISC_IO); + intel_power_well_enable(display, well); mutex_unlock(&power_domains->lock); intel_cdclk_init_hw(display); - gen9_dbuf_enable(dev_priv); + gen9_dbuf_enable(display); if (resume) intel_dmc_load_program(display); } -static void skl_display_core_uninit(struct drm_i915_private *dev_priv) +static void skl_display_core_uninit(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; gen9_disable_dc_states(display); /* TODO: disable DMC program */ - gen9_dbuf_disable(dev_priv); + gen9_dbuf_disable(display); intel_cdclk_uninit_hw(display); @@ -1476,17 +1492,16 @@ static void skl_display_core_uninit(struct drm_i915_private *dev_priv) * Note that even though the driver's request is removed power well 1 * may stay enabled after this due to DMC's own request on it. */ - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_disable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_disable(display, well); mutex_unlock(&power_domains->lock); usleep_range(10, 30); /* 10 us delay per Bspec */ } -static void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume) +static void bxt_display_core_init(struct intel_display *display, bool resume) { - struct intel_display *display = &dev_priv->display; struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; @@ -1498,40 +1513,39 @@ static void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume * Move the handshake programming to initialization sequence. * Previously was left up to BIOS. */ - intel_pch_reset_handshake(dev_priv, false); + intel_pch_reset_handshake(display, false); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; /* Enable PG1 */ mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_enable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_enable(display, well); mutex_unlock(&power_domains->lock); intel_cdclk_init_hw(display); - gen9_dbuf_enable(dev_priv); + gen9_dbuf_enable(display); if (resume) intel_dmc_load_program(display); } -static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) +static void bxt_display_core_uninit(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; gen9_disable_dc_states(display); /* TODO: disable DMC program */ - gen9_dbuf_disable(dev_priv); + gen9_dbuf_disable(display); intel_cdclk_uninit_hw(display); @@ -1544,8 +1558,8 @@ static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) */ mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_disable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_disable(display, well); mutex_unlock(&power_domains->lock); @@ -1582,20 +1596,21 @@ static const struct buddy_page_mask wa_1409767108_buddy_page_masks[] = { {} }; -static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) +static void tgl_bw_buddy_init(struct intel_display *display) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum intel_dram_type type = dev_priv->dram_info.type; u8 num_channels = dev_priv->dram_info.num_channels; const struct buddy_page_mask *table; - unsigned long abox_mask = DISPLAY_INFO(dev_priv)->abox_mask; + unsigned long abox_mask = DISPLAY_INFO(display)->abox_mask; int config, i; /* BW_BUDDY registers are not used on dgpu's beyond DG1 */ - if (IS_DGFX(dev_priv) && !IS_DG1(dev_priv)) + if (display->platform.dgfx && !display->platform.dg1) return; - if (IS_ALDERLAKE_S(dev_priv) || - (IS_ROCKETLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0))) + if (display->platform.alderlake_s || + (display->platform.rocketlake && IS_DISPLAY_STEP(display, STEP_A0, STEP_B0))) /* Wa_1409767108 */ table = wa_1409767108_buddy_page_masks; else @@ -1607,29 +1622,29 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) break; if (table[config].page_mask == 0) { - drm_dbg(&dev_priv->drm, - "Unknown memory configuration; disabling address buddy logic.\n"); + drm_dbg_kms(display->drm, + "Unknown memory configuration; disabling address buddy logic.\n"); for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) - intel_de_write(dev_priv, BW_BUDDY_CTL(i), + intel_de_write(display, BW_BUDDY_CTL(i), BW_BUDDY_DISABLE); } else { for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) { - intel_de_write(dev_priv, BW_BUDDY_PAGE_MASK(i), + intel_de_write(display, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); /* Wa_22010178259:tgl,dg1,rkl,adl-s */ - if (DISPLAY_VER(dev_priv) == 12) - intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), + if (DISPLAY_VER(display) == 12) + intel_de_rmw(display, BW_BUDDY_CTL(i), BW_BUDDY_TLB_REQ_TIMER_MASK, BW_BUDDY_TLB_REQ_TIMER(0x8)); } } } -static void icl_display_core_init(struct drm_i915_private *dev_priv, +static void icl_display_core_init(struct intel_display *display, bool resume) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; @@ -1638,13 +1653,13 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, /* Wa_14011294188:ehl,jsl,tgl,rkl,adl-s */ if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && INTEL_PCH_TYPE(dev_priv) < PCH_DG1) - intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, 0, + intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, 0, PCH_DPMGUNIT_CLOCK_GATE_DISABLE); /* 1. Enable PCH reset handshake. */ - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv)); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; /* 2. Initialize all combo phys */ @@ -1655,67 +1670,67 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, * The AUX IO power wells will be enabled on demand. */ mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_enable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_enable(display, well); mutex_unlock(&power_domains->lock); - if (DISPLAY_VER(dev_priv) == 14) - intel_de_rmw(dev_priv, DC_STATE_EN, + if (DISPLAY_VER(display) == 14) + intel_de_rmw(display, DC_STATE_EN, HOLD_PHY_PG1_LATCH | HOLD_PHY_CLKREQ_PG1_LATCH, 0); /* 4. Enable CDCLK. */ intel_cdclk_init_hw(display); - if (DISPLAY_VER(dev_priv) >= 12) - gen12_dbuf_slices_config(dev_priv); + if (DISPLAY_VER(display) >= 12) + gen12_dbuf_slices_config(display); /* 5. Enable DBUF. */ - gen9_dbuf_enable(dev_priv); + gen9_dbuf_enable(display); /* 6. Setup MBUS. */ - icl_mbus_init(dev_priv); + icl_mbus_init(display); /* 7. Program arbiter BW_BUDDY registers */ - if (DISPLAY_VER(dev_priv) >= 12) - tgl_bw_buddy_init(dev_priv); + if (DISPLAY_VER(display) >= 12) + tgl_bw_buddy_init(display); /* 8. Ensure PHYs have completed calibration and adaptation */ - if (IS_DG2(dev_priv)) + if (display->platform.dg2) intel_snps_phy_wait_for_calibration(dev_priv); /* 9. XE2_HPD: Program CHICKEN_MISC_2 before any cursor or planes are enabled */ - if (DISPLAY_VERx100(dev_priv) == 1401) - intel_de_rmw(dev_priv, CHICKEN_MISC_2, BMG_DARB_HALF_BLK_END_BURST, 1); + if (DISPLAY_VERx100(display) == 1401) + intel_de_rmw(display, CHICKEN_MISC_2, BMG_DARB_HALF_BLK_END_BURST, 1); if (resume) intel_dmc_load_program(display); /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */ - if (IS_DISPLAY_VERx100(dev_priv, 1200, 1300)) - intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0, + if (IS_DISPLAY_VERx100(display, 1200, 1300)) + intel_de_rmw(display, GEN11_CHICKEN_DCPR_2, 0, DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR); /* Wa_14011503030:xelpd */ - if (DISPLAY_VER(dev_priv) == 13) - intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); + if (DISPLAY_VER(display) == 13) + intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); /* Wa_15013987218 */ - if (DISPLAY_VER(dev_priv) == 20) { - intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + if (DISPLAY_VER(display) == 20) { + intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, 0, PCH_GMBUSUNIT_CLOCK_GATE_DISABLE); - intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, PCH_GMBUSUNIT_CLOCK_GATE_DISABLE, 0); } } -static void icl_display_core_uninit(struct drm_i915_private *dev_priv) +static void icl_display_core_uninit(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; gen9_disable_dc_states(display); @@ -1724,13 +1739,13 @@ static void icl_display_core_uninit(struct drm_i915_private *dev_priv) /* 1. Disable all display engine functions -> aready done */ /* 2. Disable DBUF */ - gen9_dbuf_disable(dev_priv); + gen9_dbuf_disable(display); /* 3. Disable CD clock */ intel_cdclk_uninit_hw(display); - if (DISPLAY_VER(dev_priv) == 14) - intel_de_rmw(dev_priv, DC_STATE_EN, 0, + if (DISPLAY_VER(display) == 14) + intel_de_rmw(display, DC_STATE_EN, 0, HOLD_PHY_PG1_LATCH | HOLD_PHY_CLKREQ_PG1_LATCH); /* @@ -1739,20 +1754,20 @@ static void icl_display_core_uninit(struct drm_i915_private *dev_priv) * disabled at this point. */ mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_disable(dev_priv, well); + well = lookup_power_well(display, SKL_DISP_PW_1); + intel_power_well_disable(display, well); mutex_unlock(&power_domains->lock); /* 5. */ intel_combo_phy_uninit(dev_priv); } -static void chv_phy_control_init(struct drm_i915_private *dev_priv) +static void chv_phy_control_init(struct intel_display *display) { struct i915_power_well *cmn_bc = - lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC); + lookup_power_well(display, VLV_DISP_PW_DPIO_CMN_BC); struct i915_power_well *cmn_d = - lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D); + lookup_power_well(display, CHV_DISP_PW_DPIO_CMN_D); /* * DISPLAY_PHY_CONTROL can get corrupted if read. As a @@ -1761,7 +1776,7 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * power well state and lane status to reconstruct the * expected initial value. */ - dev_priv->display.power.chv_phy_control = + display->power.chv_phy_control = PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) | PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) | PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH0) | @@ -1775,39 +1790,39 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * override and set the lane powerdown bits accding to the * current lane status. */ - if (intel_power_well_is_enabled(dev_priv, cmn_bc)) { - u32 status = intel_de_read(dev_priv, DPLL(dev_priv, PIPE_A)); + if (intel_power_well_is_enabled(display, cmn_bc)) { + u32 status = intel_de_read(display, DPLL(display, PIPE_A)); unsigned int mask; mask = status & DPLL_PORTB_READY_MASK; if (mask == 0xf) mask = 0x0; else - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0); - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH0); mask = (status & DPLL_PORTC_READY_MASK) >> 4; if (mask == 0xf) mask = 0x0; else - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1); - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH1); - dev_priv->display.power.chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY0); + display->power.chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY0); - dev_priv->display.power.chv_phy_assert[DPIO_PHY0] = false; + display->power.chv_phy_assert[DPIO_PHY0] = false; } else { - dev_priv->display.power.chv_phy_assert[DPIO_PHY0] = true; + display->power.chv_phy_assert[DPIO_PHY0] = true; } - if (intel_power_well_is_enabled(dev_priv, cmn_d)) { - u32 status = intel_de_read(dev_priv, DPIO_PHY_STATUS); + if (intel_power_well_is_enabled(display, cmn_d)) { + u32 status = intel_de_read(display, DPIO_PHY_STATUS); unsigned int mask; mask = status & DPLL_PORTD_READY_MASK; @@ -1815,42 +1830,42 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) if (mask == 0xf) mask = 0x0; else - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0); - dev_priv->display.power.chv_phy_control |= + display->power.chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY1, DPIO_CH0); - dev_priv->display.power.chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY1); + display->power.chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY1); - dev_priv->display.power.chv_phy_assert[DPIO_PHY1] = false; + display->power.chv_phy_assert[DPIO_PHY1] = false; } else { - dev_priv->display.power.chv_phy_assert[DPIO_PHY1] = true; + display->power.chv_phy_assert[DPIO_PHY1] = true; } - drm_dbg_kms(&dev_priv->drm, "Initial PHY_CONTROL=0x%08x\n", - dev_priv->display.power.chv_phy_control); + drm_dbg_kms(display->drm, "Initial PHY_CONTROL=0x%08x\n", + display->power.chv_phy_control); /* Defer application of initial phy_control to enabling the powerwell */ } -static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) +static void vlv_cmnlane_wa(struct intel_display *display) { struct i915_power_well *cmn = - lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC); + lookup_power_well(display, VLV_DISP_PW_DPIO_CMN_BC); struct i915_power_well *disp2d = - lookup_power_well(dev_priv, VLV_DISP_PW_DISP2D); + lookup_power_well(display, VLV_DISP_PW_DISP2D); /* If the display might be already active skip this */ - if (intel_power_well_is_enabled(dev_priv, cmn) && - intel_power_well_is_enabled(dev_priv, disp2d) && - intel_de_read(dev_priv, DPIO_CTL) & DPIO_CMNRST) + if (intel_power_well_is_enabled(display, cmn) && + intel_power_well_is_enabled(display, disp2d) && + intel_de_read(display, DPIO_CTL) & DPIO_CMNRST) return; - drm_dbg_kms(&dev_priv->drm, "toggling display PHY side reset\n"); + drm_dbg_kms(display->drm, "toggling display PHY side reset\n"); /* cmnlane needs DPLL registers */ - intel_power_well_enable(dev_priv, disp2d); + intel_power_well_enable(display, disp2d); /* * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx: @@ -1859,11 +1874,12 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * Simply ungating isn't enough to reset the PHY enough to get * ports and lanes running. */ - intel_power_well_disable(dev_priv, cmn); + intel_power_well_disable(display, cmn); } -static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0) +static bool vlv_punit_is_power_gated(struct intel_display *display, u32 reg0) { + struct drm_i915_private *dev_priv = to_i915(display->drm); bool ret; vlv_punit_get(dev_priv); @@ -1873,14 +1889,14 @@ static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0 return ret; } -static void assert_ved_power_gated(struct drm_i915_private *dev_priv) +static void assert_ved_power_gated(struct intel_display *display) { - drm_WARN(&dev_priv->drm, - !vlv_punit_is_power_gated(dev_priv, PUNIT_REG_VEDSSPM0), + drm_WARN(display->drm, + !vlv_punit_is_power_gated(display, PUNIT_REG_VEDSSPM0), "VED not power gated\n"); } -static void assert_isp_power_gated(struct drm_i915_private *dev_priv) +static void assert_isp_power_gated(struct intel_display *display) { static const struct pci_device_id isp_ids[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0f38)}, @@ -1888,16 +1904,16 @@ static void assert_isp_power_gated(struct drm_i915_private *dev_priv) {} }; - drm_WARN(&dev_priv->drm, !pci_dev_present(isp_ids) && - !vlv_punit_is_power_gated(dev_priv, PUNIT_REG_ISPSSPM0), + drm_WARN(display->drm, !pci_dev_present(isp_ids) && + !vlv_punit_is_power_gated(display, PUNIT_REG_ISPSSPM0), "ISP not power gated\n"); } -static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); +static void intel_power_domains_verify_state(struct intel_display *display); /** * intel_power_domains_init_hw - initialize hardware power domain state - * @i915: i915 device instance + * @display: display device instance * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all @@ -1911,34 +1927,35 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); * intel_power_domains_enable()) and must be paired with * intel_power_domains_driver_remove(). */ -void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) +void intel_power_domains_init_hw(struct intel_display *display, bool resume) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct drm_i915_private *i915 = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; power_domains->initializing = true; - if (DISPLAY_VER(i915) >= 11) { - icl_display_core_init(i915, resume); - } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { - bxt_display_core_init(i915, resume); - } else if (DISPLAY_VER(i915) == 9) { - skl_display_core_init(i915, resume); - } else if (IS_CHERRYVIEW(i915)) { + if (DISPLAY_VER(display) >= 11) { + icl_display_core_init(display, resume); + } else if (display->platform.geminilake || display->platform.broxton) { + bxt_display_core_init(display, resume); + } else if (DISPLAY_VER(display) == 9) { + skl_display_core_init(display, resume); + } else if (display->platform.cherryview) { mutex_lock(&power_domains->lock); - chv_phy_control_init(i915); + chv_phy_control_init(display); mutex_unlock(&power_domains->lock); - assert_isp_power_gated(i915); - } else if (IS_VALLEYVIEW(i915)) { + assert_isp_power_gated(display); + } else if (display->platform.valleyview) { mutex_lock(&power_domains->lock); - vlv_cmnlane_wa(i915); + vlv_cmnlane_wa(display); mutex_unlock(&power_domains->lock); - assert_ved_power_gated(i915); - assert_isp_power_gated(i915); - } else if (IS_BROADWELL(i915) || IS_HASWELL(i915)) { - hsw_assert_cdclk(i915); - intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915)); - } else if (IS_IVYBRIDGE(i915)) { - intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915)); + assert_ved_power_gated(display); + assert_isp_power_gated(display); + } else if (display->platform.broadwell || display->platform.haswell) { + hsw_assert_cdclk(display); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915)); + } else if (display->platform.ivybridge) { + intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915)); } /* @@ -1947,24 +1964,24 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) * resources powered until display HW readout is complete. We drop * this reference in intel_power_domains_enable(). */ - drm_WARN_ON(&i915->drm, power_domains->init_wakeref); + drm_WARN_ON(display->drm, power_domains->init_wakeref); power_domains->init_wakeref = intel_display_power_get(i915, POWER_DOMAIN_INIT); /* Disable power support if the user asked so. */ - if (!i915->display.params.disable_power_well) { - drm_WARN_ON(&i915->drm, power_domains->disable_wakeref); - i915->display.power.domains.disable_wakeref = intel_display_power_get(i915, - POWER_DOMAIN_INIT); + if (!display->params.disable_power_well) { + drm_WARN_ON(display->drm, power_domains->disable_wakeref); + display->power.domains.disable_wakeref = intel_display_power_get(i915, + POWER_DOMAIN_INIT); } - intel_power_domains_sync_hw(i915); + intel_power_domains_sync_hw(display); power_domains->initializing = false; } /** * intel_power_domains_driver_remove - deinitialize hw power domain state - * @i915: i915 device instance + * @display: display device instance * * De-initializes the display power domain HW state. It also ensures that the * device stays powered up so that the driver can be reloaded. @@ -1973,19 +1990,20 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) * intel_power_domains_disable()) and must be paired with * intel_power_domains_init_hw(). */ -void intel_power_domains_driver_remove(struct drm_i915_private *i915) +void intel_power_domains_driver_remove(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); intel_wakeref_t wakeref __maybe_unused = - fetch_and_zero(&i915->display.power.domains.init_wakeref); + fetch_and_zero(&display->power.domains.init_wakeref); /* Remove the refcount we took to keep power well support disabled. */ - if (!i915->display.params.disable_power_well) + if (!display->params.disable_power_well) intel_display_power_put(i915, POWER_DOMAIN_INIT, - fetch_and_zero(&i915->display.power.domains.disable_wakeref)); + fetch_and_zero(&display->power.domains.disable_wakeref)); - intel_display_power_flush_work_sync(i915); + intel_display_power_flush_work_sync(display); - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); /* Keep the power well enabled, but cancel its rpm wakeref. */ intel_runtime_pm_put(&i915->runtime_pm, wakeref); @@ -1993,7 +2011,7 @@ void intel_power_domains_driver_remove(struct drm_i915_private *i915) /** * intel_power_domains_sanitize_state - sanitize power domains state - * @i915: i915 device instance + * @display: display device instance * * Sanitize the power domains state during driver loading and system resume. * The function will disable all display power wells that BIOS has enabled @@ -2001,22 +2019,22 @@ void intel_power_domains_driver_remove(struct drm_i915_private *i915) * on it by the time this function is called, after the state of all the * pipe, encoder, etc. HW resources have been sanitized). */ -void intel_power_domains_sanitize_state(struct drm_i915_private *i915) +void intel_power_domains_sanitize_state(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; mutex_lock(&power_domains->lock); - for_each_power_well_reverse(i915, power_well) { + for_each_power_well_reverse(display, power_well) { if (power_well->desc->always_on || power_well->count || - !intel_power_well_is_enabled(i915, power_well)) + !intel_power_well_is_enabled(display, power_well)) continue; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "BIOS left unused %s power well enabled, disabling it\n", intel_power_well_name(power_well)); - intel_power_well_disable(i915, power_well); + intel_power_well_disable(display, power_well); } mutex_unlock(&power_domains->lock); @@ -2024,7 +2042,7 @@ void intel_power_domains_sanitize_state(struct drm_i915_private *i915) /** * intel_power_domains_enable - enable toggling of display power wells - * @i915: i915 device instance + * @display: display device instance * * Enable the ondemand enabling/disabling of the display power wells. Note that * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled @@ -2034,36 +2052,38 @@ void intel_power_domains_sanitize_state(struct drm_i915_private *i915) * of display HW readout (which will acquire the power references reflecting * the current HW state). */ -void intel_power_domains_enable(struct drm_i915_private *i915) +void intel_power_domains_enable(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); intel_wakeref_t wakeref __maybe_unused = - fetch_and_zero(&i915->display.power.domains.init_wakeref); + fetch_and_zero(&display->power.domains.init_wakeref); intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); } /** * intel_power_domains_disable - disable toggling of display power wells - * @i915: i915 device instance + * @display: display device instance * * Disable the ondemand enabling/disabling of the display power wells. See * intel_power_domains_enable() for which power wells this call controls. */ -void intel_power_domains_disable(struct drm_i915_private *i915) +void intel_power_domains_disable(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct drm_i915_private *i915 = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; - drm_WARN_ON(&i915->drm, power_domains->init_wakeref); + drm_WARN_ON(display->drm, power_domains->init_wakeref); power_domains->init_wakeref = intel_display_power_get(i915, POWER_DOMAIN_INIT); - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); } /** * intel_power_domains_suspend - suspend power domain state - * @i915: i915 device instance + * @display: display device instance * @s2idle: specifies whether we go to idle, or deeper sleep * * This function prepares the hardware power domain state before entering @@ -2072,9 +2092,9 @@ void intel_power_domains_disable(struct drm_i915_private *i915) * It must be called with power domains already disabled (after a call to * intel_power_domains_disable()) and paired with intel_power_domains_resume(). */ -void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle) +void intel_power_domains_suspend(struct intel_display *display, bool s2idle) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; intel_wakeref_t wakeref __maybe_unused = fetch_and_zero(&power_domains->init_wakeref); @@ -2091,7 +2111,7 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle) if (!(power_domains->allowed_dc_mask & DC_STATE_EN_DC9) && s2idle && intel_dmc_has_payload(display)) { intel_display_power_flush_work(i915); - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); return; } @@ -2099,26 +2119,26 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle) * Even if power well support was disabled we still want to disable * power wells if power domains must be deinitialized for suspend. */ - if (!i915->display.params.disable_power_well) + if (!display->params.disable_power_well) intel_display_power_put(i915, POWER_DOMAIN_INIT, - fetch_and_zero(&i915->display.power.domains.disable_wakeref)); + fetch_and_zero(&display->power.domains.disable_wakeref)); intel_display_power_flush_work(i915); - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); - if (DISPLAY_VER(i915) >= 11) - icl_display_core_uninit(i915); - else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) - bxt_display_core_uninit(i915); - else if (DISPLAY_VER(i915) == 9) - skl_display_core_uninit(i915); + if (DISPLAY_VER(display) >= 11) + icl_display_core_uninit(display); + else if (display->platform.geminilake || display->platform.broxton) + bxt_display_core_uninit(display); + else if (DISPLAY_VER(display) == 9) + skl_display_core_uninit(display); power_domains->display_core_suspended = true; } /** * intel_power_domains_resume - resume power domain state - * @i915: i915 device instance + * @display: display device instance * * This function resume the hardware power domain state during system resume. * @@ -2126,45 +2146,46 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle) * intel_power_domains_enable()) and must be paired with * intel_power_domains_suspend(). */ -void intel_power_domains_resume(struct drm_i915_private *i915) +void intel_power_domains_resume(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct drm_i915_private *i915 = to_i915(display->drm); + struct i915_power_domains *power_domains = &display->power.domains; if (power_domains->display_core_suspended) { - intel_power_domains_init_hw(i915, true); + intel_power_domains_init_hw(display, true); power_domains->display_core_suspended = false; } else { - drm_WARN_ON(&i915->drm, power_domains->init_wakeref); + drm_WARN_ON(display->drm, power_domains->init_wakeref); power_domains->init_wakeref = intel_display_power_get(i915, POWER_DOMAIN_INIT); } - intel_power_domains_verify_state(i915); + intel_power_domains_verify_state(display); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -static void intel_power_domains_dump_info(struct drm_i915_private *i915) +static void intel_power_domains_dump_info(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; - for_each_power_well(i915, power_well) { + for_each_power_well(display, power_well) { enum intel_display_power_domain domain; - drm_dbg(&i915->drm, "%-25s %d\n", - intel_power_well_name(power_well), intel_power_well_refcount(power_well)); + drm_dbg_kms(display->drm, "%-25s %d\n", + intel_power_well_name(power_well), intel_power_well_refcount(power_well)); for_each_power_domain(domain, intel_power_well_domains(power_well)) - drm_dbg(&i915->drm, " %-23s %d\n", - intel_display_power_domain_str(domain), - power_domains->domain_use_count[domain]); + drm_dbg_kms(display->drm, " %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); } } /** * intel_power_domains_verify_state - verify the HW/SW state for all power wells - * @i915: i915 device instance + * @display: display device instance * * Verify if the reference count of each power well matches its HW enabled * state and the total refcount of the domains it belongs to. This must be @@ -2172,9 +2193,9 @@ static void intel_power_domains_dump_info(struct drm_i915_private *i915) * acquiring reference counts for any power wells in use and disabling the * ones left on by BIOS but not required by any active output. */ -static void intel_power_domains_verify_state(struct drm_i915_private *i915) +static void intel_power_domains_verify_state(struct intel_display *display) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *power_well; bool dump_domain_info; @@ -2183,16 +2204,16 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) verify_async_put_domains_state(power_domains); dump_domain_info = false; - for_each_power_well(i915, power_well) { + for_each_power_well(display, power_well) { enum intel_display_power_domain domain; int domains_count; bool enabled; - enabled = intel_power_well_is_enabled(i915, power_well); + enabled = intel_power_well_is_enabled(display, power_well); if ((intel_power_well_refcount(power_well) || intel_power_well_is_always_on(power_well)) != enabled) - drm_err(&i915->drm, + drm_err(display->drm, "power well %s state mismatch (refcount %d/enabled %d)", intel_power_well_name(power_well), intel_power_well_refcount(power_well), enabled); @@ -2202,7 +2223,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) domains_count += power_domains->domain_use_count[domain]; if (intel_power_well_refcount(power_well) != domains_count) { - drm_err(&i915->drm, + drm_err(display->drm, "power well %s refcount/domain refcount mismatch " "(refcount %d/domains refcount %d)\n", intel_power_well_name(power_well), @@ -2216,7 +2237,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) static bool dumped; if (!dumped) { - intel_power_domains_dump_info(i915); + intel_power_domains_dump_info(display); dumped = true; } } @@ -2226,21 +2247,23 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) #else -static void intel_power_domains_verify_state(struct drm_i915_private *i915) +static void intel_power_domains_verify_state(struct intel_display *display) { } #endif -void intel_display_power_suspend_late(struct drm_i915_private *i915) +void intel_display_power_suspend_late(struct intel_display *display, bool s2idle) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); + + intel_power_domains_suspend(display, s2idle); - if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || - IS_BROXTON(i915)) { + if (DISPLAY_VER(display) >= 11 || display->platform.geminilake || + display->platform.broxton) { bxt_enable_dc9(display); - } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { - hsw_enable_pc8(i915); + } else if (display->platform.haswell || display->platform.broadwell) { + hsw_enable_pc8(display); } /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ @@ -2248,66 +2271,66 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915) intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); } -void intel_display_power_resume_early(struct drm_i915_private *i915) +void intel_display_power_resume_early(struct intel_display *display) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || - IS_BROXTON(i915)) { + if (DISPLAY_VER(display) >= 11 || display->platform.geminilake || + display->platform.broxton) { gen9_sanitize_dc_state(display); bxt_disable_dc9(display); - } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { - hsw_disable_pc8(i915); + } else if (display->platform.haswell || display->platform.broadwell) { + hsw_disable_pc8(display); } /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1) intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); + + intel_power_domains_resume(display); } -void intel_display_power_suspend(struct drm_i915_private *i915) +void intel_display_power_suspend(struct intel_display *display) { - struct intel_display *display = &i915->display; - - if (DISPLAY_VER(i915) >= 11) { - icl_display_core_uninit(i915); + if (DISPLAY_VER(display) >= 11) { + icl_display_core_uninit(display); bxt_enable_dc9(display); - } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { - bxt_display_core_uninit(i915); + } else if (display->platform.geminilake || display->platform.broxton) { + bxt_display_core_uninit(display); bxt_enable_dc9(display); - } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { - hsw_enable_pc8(i915); + } else if (display->platform.haswell || display->platform.broadwell) { + hsw_enable_pc8(display); } } -void intel_display_power_resume(struct drm_i915_private *i915) +void intel_display_power_resume(struct intel_display *display) { - struct intel_display *display = &i915->display; struct i915_power_domains *power_domains = &display->power.domains; - if (DISPLAY_VER(i915) >= 11) { + if (DISPLAY_VER(display) >= 11) { bxt_disable_dc9(display); - icl_display_core_init(i915, true); + icl_display_core_init(display, true); if (intel_dmc_has_payload(display)) { if (power_domains->allowed_dc_mask & DC_STATE_EN_UPTO_DC6) skl_enable_dc6(display); else if (power_domains->allowed_dc_mask & DC_STATE_EN_UPTO_DC5) gen9_enable_dc5(display); } - } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { + } else if (display->platform.geminilake || display->platform.broxton) { bxt_disable_dc9(display); - bxt_display_core_init(i915, true); + bxt_display_core_init(display, true); if (intel_dmc_has_payload(display) && (power_domains->allowed_dc_mask & DC_STATE_EN_UPTO_DC5)) gen9_enable_dc5(display); - } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { - hsw_disable_pc8(i915); + } else if (display->platform.haswell || display->platform.broadwell) { + hsw_disable_pc8(display); } } void intel_display_power_debug(struct drm_i915_private *i915, struct seq_file *m) { - struct i915_power_domains *power_domains = &i915->display.power.domains; + struct intel_display *display = &i915->display; + struct i915_power_domains *power_domains = &display->power.domains; int i; mutex_lock(&power_domains->lock); @@ -2452,17 +2475,17 @@ d13_port_domains[] = { }; static void -intel_port_domains_for_platform(struct drm_i915_private *i915, +intel_port_domains_for_platform(struct intel_display *display, const struct intel_ddi_port_domains **domains, int *domains_size) { - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { *domains = d13_port_domains; *domains_size = ARRAY_SIZE(d13_port_domains); - } else if (DISPLAY_VER(i915) >= 12) { + } else if (DISPLAY_VER(display) >= 12) { *domains = d12_port_domains; *domains_size = ARRAY_SIZE(d12_port_domains); - } else if (DISPLAY_VER(i915) >= 11) { + } else if (DISPLAY_VER(display) >= 11) { *domains = d11_port_domains; *domains_size = ARRAY_SIZE(d11_port_domains); } else { @@ -2472,13 +2495,13 @@ intel_port_domains_for_platform(struct drm_i915_private *i915, } static const struct intel_ddi_port_domains * -intel_port_domains_for_port(struct drm_i915_private *i915, enum port port) +intel_port_domains_for_port(struct intel_display *display, enum port port) { const struct intel_ddi_port_domains *domains; int domains_size; int i; - intel_port_domains_for_platform(i915, &domains, &domains_size); + intel_port_domains_for_platform(display, &domains, &domains_size); for (i = 0; i < domains_size; i++) if (port >= domains[i].port_start && port <= domains[i].port_end) return &domains[i]; @@ -2489,9 +2512,10 @@ intel_port_domains_for_port(struct drm_i915_private *i915, enum port port) enum intel_display_power_domain intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { - const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); + struct intel_display *display = &i915->display; + const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(display, port); - if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) + if (drm_WARN_ON(display->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2500,22 +2524,23 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) enum intel_display_power_domain intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port port) { - const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); + struct intel_display *display = &i915->display; + const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(display, port); - if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) + if (drm_WARN_ON(display->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); } static const struct intel_ddi_port_domains * -intel_port_domains_for_aux_ch(struct drm_i915_private *i915, enum aux_ch aux_ch) +intel_port_domains_for_aux_ch(struct intel_display *display, enum aux_ch aux_ch) { const struct intel_ddi_port_domains *domains; int domains_size; int i; - intel_port_domains_for_platform(i915, &domains, &domains_size); + intel_port_domains_for_platform(display, &domains, &domains_size); for (i = 0; i < domains_size; i++) if (aux_ch >= domains[i].aux_ch_start && aux_ch <= domains[i].aux_ch_end) return &domains[i]; @@ -2526,9 +2551,10 @@ intel_port_domains_for_aux_ch(struct drm_i915_private *i915, enum aux_ch aux_ch) enum intel_display_power_domain intel_display_power_aux_io_domain(struct drm_i915_private *i915, enum aux_ch aux_ch) { - const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); + struct intel_display *display = &i915->display; + const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(display, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains || domains->aux_io == POWER_DOMAIN_INVALID)) + if (drm_WARN_ON(display->drm, !domains || domains->aux_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_IO_A; return domains->aux_io + (int)(aux_ch - domains->aux_ch_start); @@ -2537,9 +2563,10 @@ intel_display_power_aux_io_domain(struct drm_i915_private *i915, enum aux_ch aux enum intel_display_power_domain intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch aux_ch) { - const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); + struct intel_display *display = &i915->display; + const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(display, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) + if (drm_WARN_ON(display->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2548,9 +2575,10 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch enum intel_display_power_domain intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch aux_ch) { - const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); + struct intel_display *display = &i915->display; + const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(display, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) + if (drm_WARN_ON(display->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 3f8f84df4733..7b294eec4431 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -15,6 +15,7 @@ enum aux_ch; enum port; struct drm_i915_private; struct i915_power_well; +struct intel_display; struct intel_encoder; struct seq_file; @@ -166,21 +167,21 @@ struct intel_display_power_domain_set { for ((__domain) = 0; (__domain) < POWER_DOMAIN_NUM; (__domain)++) \ for_each_if(test_bit((__domain), (__mask)->bits)) -int intel_power_domains_init(struct drm_i915_private *dev_priv); -void intel_power_domains_cleanup(struct drm_i915_private *dev_priv); -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); -void intel_power_domains_driver_remove(struct drm_i915_private *dev_priv); -void intel_power_domains_enable(struct drm_i915_private *dev_priv); -void intel_power_domains_disable(struct drm_i915_private *dev_priv); -void intel_power_domains_suspend(struct drm_i915_private *dev_priv, bool s2idle); -void intel_power_domains_resume(struct drm_i915_private *dev_priv); -void intel_power_domains_sanitize_state(struct drm_i915_private *dev_priv); - -void intel_display_power_suspend_late(struct drm_i915_private *i915); -void intel_display_power_resume_early(struct drm_i915_private *i915); -void intel_display_power_suspend(struct drm_i915_private *i915); -void intel_display_power_resume(struct drm_i915_private *i915); -void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, +int intel_power_domains_init(struct intel_display *display); +void intel_power_domains_cleanup(struct intel_display *display); +void intel_power_domains_init_hw(struct intel_display *display, bool resume); +void intel_power_domains_driver_remove(struct intel_display *display); +void intel_power_domains_enable(struct intel_display *display); +void intel_power_domains_disable(struct intel_display *display); +void intel_power_domains_suspend(struct intel_display *display, bool s2idle); +void intel_power_domains_resume(struct intel_display *display); +void intel_power_domains_sanitize_state(struct intel_display *display); + +void intel_display_power_suspend_late(struct intel_display *display, bool s2idle); +void intel_display_power_resume_early(struct intel_display *display); +void intel_display_power_suspend(struct intel_display *display); +void intel_display_power_resume(struct intel_display *display); +void intel_display_power_set_target_dc_state(struct intel_display *display, u32 state); bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index 5575aa0d6689..0c8ac1af6db7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -3,14 +3,12 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_drv.h" #include "i915_reg.h" - -#include "vlv_sideband_reg.h" - +#include "intel_display_core.h" #include "intel_display_power_map.h" #include "intel_display_power_well.h" #include "intel_display_types.h" +#include "vlv_sideband_reg.h" #define __LIST_INLINE_ELEMS(__elem_type, ...) \ ((__elem_type[]) { __VA_ARGS__ }) @@ -1752,9 +1750,9 @@ __set_power_wells(struct i915_power_domains *power_domains, const struct i915_power_well_desc_list *power_well_descs, int power_well_descs_sz) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); u64 power_well_ids = 0; const struct i915_power_well_desc_list *desc_list; const struct i915_power_well_desc *desc; @@ -1778,7 +1776,7 @@ __set_power_wells(struct i915_power_domains *power_domains, enum i915_power_well_id id = inst->id; pw->desc = desc; - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, overflows_type(inst - desc->instances->list, pw->instance_idx)); pw->instance_idx = inst - desc->instances->list; @@ -1789,8 +1787,8 @@ __set_power_wells(struct i915_power_domains *power_domains, if (id == DISP_PW_ID_NONE) continue; - drm_WARN_ON(&i915->drm, id >= sizeof(power_well_ids) * 8); - drm_WARN_ON(&i915->drm, power_well_ids & BIT_ULL(id)); + drm_WARN_ON(display->drm, id >= sizeof(power_well_ids) * 8); + drm_WARN_ON(display->drm, power_well_ids & BIT_ULL(id)); power_well_ids |= BIT_ULL(id); } @@ -1811,53 +1809,53 @@ __set_power_wells(struct i915_power_domains *power_domains, */ int intel_display_power_map_init(struct i915_power_domains *power_domains) { - struct drm_i915_private *i915 = container_of(power_domains, - struct drm_i915_private, - display.power.domains); + struct intel_display *display = container_of(power_domains, + struct intel_display, + power.domains); /* * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (!HAS_DISPLAY(i915)) { + if (!HAS_DISPLAY(display)) { power_domains->power_well_count = 0; return 0; } - if (DISPLAY_VER(i915) >= 30) + if (DISPLAY_VER(display) >= 30) return set_power_wells(power_domains, xe3lpd_power_wells); - else if (DISPLAY_VER(i915) >= 20) + else if (DISPLAY_VER(display) >= 20) return set_power_wells(power_domains, xe2lpd_power_wells); - else if (DISPLAY_VER(i915) >= 14) + else if (DISPLAY_VER(display) >= 14) return set_power_wells(power_domains, xelpdp_power_wells); - else if (IS_DG2(i915)) + else if (display->platform.dg2) return set_power_wells(power_domains, xehpd_power_wells); - else if (DISPLAY_VER(i915) >= 13) + else if (DISPLAY_VER(display) >= 13) return set_power_wells(power_domains, xelpd_power_wells); - else if (IS_DG1(i915)) + else if (display->platform.dg1) return set_power_wells(power_domains, dg1_power_wells); - else if (IS_ALDERLAKE_S(i915)) + else if (display->platform.alderlake_s) return set_power_wells(power_domains, adls_power_wells); - else if (IS_ROCKETLAKE(i915)) + else if (display->platform.rocketlake) return set_power_wells(power_domains, rkl_power_wells); - else if (DISPLAY_VER(i915) == 12) + else if (DISPLAY_VER(display) == 12) return set_power_wells(power_domains, tgl_power_wells); - else if (DISPLAY_VER(i915) == 11) + else if (DISPLAY_VER(display) == 11) return set_power_wells(power_domains, icl_power_wells); - else if (IS_GEMINILAKE(i915)) + else if (display->platform.geminilake) return set_power_wells(power_domains, glk_power_wells); - else if (IS_BROXTON(i915)) + else if (display->platform.broxton) return set_power_wells(power_domains, bxt_power_wells); - else if (DISPLAY_VER(i915) == 9) + else if (DISPLAY_VER(display) == 9) return set_power_wells(power_domains, skl_power_wells); - else if (IS_CHERRYVIEW(i915)) + else if (display->platform.cherryview) return set_power_wells(power_domains, chv_power_wells); - else if (IS_BROADWELL(i915)) + else if (display->platform.broadwell) return set_power_wells(power_domains, bdw_power_wells); - else if (IS_HASWELL(i915)) + else if (display->platform.haswell) return set_power_wells(power_domains, hsw_power_wells); - else if (IS_VALLEYVIEW(i915)) + else if (display->platform.valleyview) return set_power_wells(power_domains, vlv_power_wells); - else if (IS_I830(i915)) + else if (display->platform.i830) return set_power_wells(power_domains, i830_power_wells); else return set_power_wells(power_domains, i9xx_power_wells); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index f0131dd853de..f45a4f9ba23c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -46,23 +46,23 @@ struct i915_power_well_ops { * during driver init and resume time, possibly after first calling * the enable/disable handlers. */ - void (*sync_hw)(struct drm_i915_private *i915, + void (*sync_hw)(struct intel_display *display, struct i915_power_well *power_well); /* * Enable the well and resources that depend on it (for example * interrupts located on the well). Called after the 0->1 refcount * transition. */ - void (*enable)(struct drm_i915_private *i915, + void (*enable)(struct intel_display *display, struct i915_power_well *power_well); /* * Disable the well and resources that depend on it. Called after * the 1->0 refcount transition. */ - void (*disable)(struct drm_i915_private *i915, + void (*disable)(struct intel_display *display, struct i915_power_well *power_well); /* Returns the hw enabled state. */ - bool (*is_enabled)(struct drm_i915_private *i915, + bool (*is_enabled)(struct intel_display *display, struct i915_power_well *power_well); }; @@ -73,12 +73,12 @@ i915_power_well_instance(const struct i915_power_well *power_well) } struct i915_power_well * -lookup_power_well(struct drm_i915_private *i915, +lookup_power_well(struct intel_display *display, enum i915_power_well_id power_well_id) { struct i915_power_well *power_well; - for_each_power_well(i915, power_well) + for_each_power_well(display, power_well) if (i915_power_well_instance(power_well)->id == power_well_id) return power_well; @@ -89,58 +89,57 @@ lookup_power_well(struct drm_i915_private *i915, * the first power well and hope the WARN gets reported so we can fix * our driver. */ - drm_WARN(&i915->drm, 1, + drm_WARN(display->drm, 1, "Power well %d not defined for this platform\n", power_well_id); - return &i915->display.power.domains.power_wells[0]; + return &display->power.domains.power_wells[0]; } -void intel_power_well_enable(struct drm_i915_private *i915, +void intel_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - drm_dbg_kms(&i915->drm, "enabling %s\n", intel_power_well_name(power_well)); - power_well->desc->ops->enable(i915, power_well); + drm_dbg_kms(display->drm, "enabling %s\n", intel_power_well_name(power_well)); + power_well->desc->ops->enable(display, power_well); power_well->hw_enabled = true; } -void intel_power_well_disable(struct drm_i915_private *i915, +void intel_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - drm_dbg_kms(&i915->drm, "disabling %s\n", intel_power_well_name(power_well)); + drm_dbg_kms(display->drm, "disabling %s\n", intel_power_well_name(power_well)); power_well->hw_enabled = false; - power_well->desc->ops->disable(i915, power_well); + power_well->desc->ops->disable(display, power_well); } -void intel_power_well_sync_hw(struct drm_i915_private *i915, +void intel_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well) { - power_well->desc->ops->sync_hw(i915, power_well); - power_well->hw_enabled = - power_well->desc->ops->is_enabled(i915, power_well); + power_well->desc->ops->sync_hw(display, power_well); + power_well->hw_enabled = power_well->desc->ops->is_enabled(display, power_well); } -void intel_power_well_get(struct drm_i915_private *i915, +void intel_power_well_get(struct intel_display *display, struct i915_power_well *power_well) { if (!power_well->count++) - intel_power_well_enable(i915, power_well); + intel_power_well_enable(display, power_well); } -void intel_power_well_put(struct drm_i915_private *i915, +void intel_power_well_put(struct intel_display *display, struct i915_power_well *power_well) { - drm_WARN(&i915->drm, !power_well->count, + drm_WARN(display->drm, !power_well->count, "Use count on power well %s is already zero", i915_power_well_instance(power_well)->name); if (!--power_well->count) - intel_power_well_disable(i915, power_well); + intel_power_well_disable(display, power_well); } -bool intel_power_well_is_enabled(struct drm_i915_private *i915, +bool intel_power_well_is_enabled(struct intel_display *display, struct i915_power_well *power_well) { - return power_well->desc->ops->is_enabled(i915, power_well); + return power_well->desc->ops->is_enabled(display, power_well); } bool intel_power_well_is_enabled_cached(struct i915_power_well *power_well) @@ -148,14 +147,14 @@ bool intel_power_well_is_enabled_cached(struct i915_power_well *power_well) return power_well->hw_enabled; } -bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, +bool intel_display_power_well_is_enabled(struct intel_display *display, enum i915_power_well_id power_well_id) { struct i915_power_well *power_well; - power_well = lookup_power_well(dev_priv, power_well_id); + power_well = lookup_power_well(display, power_well_id); - return intel_power_well_is_enabled(dev_priv, power_well); + return intel_power_well_is_enabled(display, power_well); } bool intel_power_well_is_always_on(struct i915_power_well *power_well) @@ -184,10 +183,10 @@ int intel_power_well_refcount(struct i915_power_well *power_well) * to be enabled, and it will only be disabled if none of the registers is * requesting it to be enabled. */ -static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, +static void hsw_power_well_post_enable(struct intel_display *display, u8 irq_pipe_mask, bool has_vga) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); if (has_vga) intel_vga_reset_io_mem(display); @@ -196,9 +195,11 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask); } -static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv, +static void hsw_power_well_pre_disable(struct intel_display *display, u8 irq_pipe_mask) { + struct drm_i915_private *dev_priv = to_i915(display->drm); + if (irq_pipe_mask) gen8_irq_power_well_pre_disable(dev_priv, irq_pipe_mask); } @@ -221,12 +222,12 @@ static enum aux_ch icl_aux_pw_to_ch(const struct i915_power_well *power_well) } static struct intel_digital_port * -aux_ch_to_digital_port(struct drm_i915_private *dev_priv, +aux_ch_to_digital_port(struct intel_display *display, enum aux_ch aux_ch) { struct intel_encoder *encoder; - for_each_intel_encoder(&dev_priv->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { struct intel_digital_port *dig_port; /* We'll check the MST primary port */ @@ -242,11 +243,11 @@ aux_ch_to_digital_port(struct drm_i915_private *dev_priv, return NULL; } -static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915, +static enum phy icl_aux_pw_to_phy(struct intel_display *display, const struct i915_power_well *power_well) { enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well); - struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch); + struct intel_digital_port *dig_port = aux_ch_to_digital_port(display, aux_ch); /* * FIXME should we care about the (VBT defined) dig_port->aux_ch @@ -258,7 +259,7 @@ static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915, return dig_port ? intel_encoder_to_phy(&dig_port->base) : PHY_NONE; } -static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, +static void hsw_wait_for_power_well_enable(struct intel_display *display, struct i915_power_well *power_well, bool timeout_expected) { @@ -271,39 +272,39 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, * an ack, but rather just wait a fixed amount of time and then * proceed. This is only used on DG2. */ - if (IS_DG2(dev_priv) && power_well->desc->fixed_enable_delay) { + if (display->platform.dg2 && power_well->desc->fixed_enable_delay) { usleep_range(600, 1200); return; } /* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */ - if (intel_de_wait_for_set(dev_priv, regs->driver, + if (intel_de_wait_for_set(display, regs->driver, HSW_PWR_WELL_CTL_STATE(pw_idx), timeout)) { - drm_dbg_kms(&dev_priv->drm, "%s power well enable timeout\n", + drm_dbg_kms(display->drm, "%s power well enable timeout\n", intel_power_well_name(power_well)); - drm_WARN_ON(&dev_priv->drm, !timeout_expected); + drm_WARN_ON(display->drm, !timeout_expected); } } -static u32 hsw_power_well_requesters(struct drm_i915_private *dev_priv, +static u32 hsw_power_well_requesters(struct intel_display *display, const struct i915_power_well_regs *regs, int pw_idx) { u32 req_mask = HSW_PWR_WELL_CTL_REQ(pw_idx); u32 ret; - ret = intel_de_read(dev_priv, regs->bios) & req_mask ? 1 : 0; - ret |= intel_de_read(dev_priv, regs->driver) & req_mask ? 2 : 0; + ret = intel_de_read(display, regs->bios) & req_mask ? 1 : 0; + ret |= intel_de_read(display, regs->driver) & req_mask ? 2 : 0; if (regs->kvmr.reg) - ret |= intel_de_read(dev_priv, regs->kvmr) & req_mask ? 4 : 0; - ret |= intel_de_read(dev_priv, regs->debug) & req_mask ? 8 : 0; + ret |= intel_de_read(display, regs->kvmr) & req_mask ? 4 : 0; + ret |= intel_de_read(display, regs->debug) & req_mask ? 8 : 0; return ret; } -static void hsw_wait_for_power_well_disable(struct drm_i915_private *dev_priv, +static void hsw_wait_for_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; @@ -320,28 +321,28 @@ static void hsw_wait_for_power_well_disable(struct drm_i915_private *dev_priv, * Skip the wait in case any of the request bits are set and print a * diagnostic message. */ - wait_for((disabled = !(intel_de_read(dev_priv, regs->driver) & + wait_for((disabled = !(intel_de_read(display, regs->driver) & HSW_PWR_WELL_CTL_STATE(pw_idx))) || - (reqs = hsw_power_well_requesters(dev_priv, regs, pw_idx)), 1); + (reqs = hsw_power_well_requesters(display, regs, pw_idx)), 1); if (disabled) return; - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "%s forced on (bios:%d driver:%d kvmr:%d debug:%d)\n", intel_power_well_name(power_well), !!(reqs & 1), !!(reqs & 2), !!(reqs & 4), !!(reqs & 8)); } -static void gen9_wait_for_power_well_fuses(struct drm_i915_private *dev_priv, +static void gen9_wait_for_power_well_fuses(struct intel_display *display, enum skl_power_gate pg) { /* Timeout 5us for PG#0, for other PGs 1us */ - drm_WARN_ON(&dev_priv->drm, - intel_de_wait_for_set(dev_priv, SKL_FUSE_STATUS, + drm_WARN_ON(display->drm, + intel_de_wait_for_set(display, SKL_FUSE_STATUS, SKL_FUSE_PG_DIST_STATUS(pg), 1)); } -static void hsw_power_well_enable(struct drm_i915_private *dev_priv, +static void hsw_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; @@ -350,12 +351,12 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, if (power_well->desc->has_fuses) { enum skl_power_gate pg; - pg = DISPLAY_VER(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : + pg = DISPLAY_VER(display) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : SKL_PW_CTL_IDX_TO_PG(pw_idx); /* Wa_16013190616:adlp */ - if (IS_ALDERLAKE_P(dev_priv) && pg == SKL_PG1) - intel_de_rmw(dev_priv, GEN8_CHICKEN_DCPR_1, 0, DISABLE_FLR_SRC); + if (display->platform.alderlake_p && pg == SKL_PG1) + intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, 0, DISABLE_FLR_SRC); /* * For PW1 we have to wait both for the PW0/PG0 fuse state @@ -365,112 +366,112 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, * after the enabling. */ if (pg == SKL_PG1) - gen9_wait_for_power_well_fuses(dev_priv, SKL_PG0); + gen9_wait_for_power_well_fuses(display, SKL_PG0); } - intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); + intel_de_rmw(display, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); - hsw_wait_for_power_well_enable(dev_priv, power_well, false); + hsw_wait_for_power_well_enable(display, power_well, false); if (power_well->desc->has_fuses) { enum skl_power_gate pg; - pg = DISPLAY_VER(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : + pg = DISPLAY_VER(display) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : SKL_PW_CTL_IDX_TO_PG(pw_idx); - gen9_wait_for_power_well_fuses(dev_priv, pg); + gen9_wait_for_power_well_fuses(display, pg); } - hsw_power_well_post_enable(dev_priv, + hsw_power_well_post_enable(display, power_well->desc->irq_pipe_mask, power_well->desc->has_vga); } -static void hsw_power_well_disable(struct drm_i915_private *dev_priv, +static void hsw_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; int pw_idx = i915_power_well_instance(power_well)->hsw.idx; - hsw_power_well_pre_disable(dev_priv, + hsw_power_well_pre_disable(display, power_well->desc->irq_pipe_mask); - intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); - hsw_wait_for_power_well_disable(dev_priv, power_well); + intel_de_rmw(display, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); + hsw_wait_for_power_well_disable(display, power_well); } -static bool intel_aux_ch_is_edp(struct drm_i915_private *i915, enum aux_ch aux_ch) +static bool intel_aux_ch_is_edp(struct intel_display *display, enum aux_ch aux_ch) { - struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch); + struct intel_digital_port *dig_port = aux_ch_to_digital_port(display, aux_ch); return dig_port && dig_port->base.type == INTEL_OUTPUT_EDP; } static void -icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, +icl_combo_phy_aux_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; int pw_idx = i915_power_well_instance(power_well)->hsw.idx; - drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); + drm_WARN_ON(display->drm, !display->platform.icelake); - intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); + intel_de_rmw(display, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); /* * FIXME not sure if we should derive the PHY from the pw_idx, or * from the VBT defined AUX_CH->DDI->PHY mapping. */ - intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(ICL_AUX_PW_TO_PHY(pw_idx)), + intel_de_rmw(display, ICL_PORT_CL_DW12(ICL_AUX_PW_TO_PHY(pw_idx)), 0, ICL_LANE_ENABLE_AUX); - hsw_wait_for_power_well_enable(dev_priv, power_well, false); + hsw_wait_for_power_well_enable(display, power_well, false); /* Display WA #1178: icl */ if (pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B && - !intel_aux_ch_is_edp(dev_priv, ICL_AUX_PW_TO_CH(pw_idx))) - intel_de_rmw(dev_priv, ICL_PORT_TX_DW6_AUX(ICL_AUX_PW_TO_PHY(pw_idx)), + !intel_aux_ch_is_edp(display, ICL_AUX_PW_TO_CH(pw_idx))) + intel_de_rmw(display, ICL_PORT_TX_DW6_AUX(ICL_AUX_PW_TO_PHY(pw_idx)), 0, O_FUNC_OVRD_EN | O_LDO_BYPASS_CRI); } static void -icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, +icl_combo_phy_aux_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; int pw_idx = i915_power_well_instance(power_well)->hsw.idx; - drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); + drm_WARN_ON(display->drm, !display->platform.icelake); /* * FIXME not sure if we should derive the PHY from the pw_idx, or * from the VBT defined AUX_CH->DDI->PHY mapping. */ - intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(ICL_AUX_PW_TO_PHY(pw_idx)), + intel_de_rmw(display, ICL_PORT_CL_DW12(ICL_AUX_PW_TO_PHY(pw_idx)), ICL_LANE_ENABLE_AUX, 0); - intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); + intel_de_rmw(display, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); - hsw_wait_for_power_well_disable(dev_priv, power_well); + hsw_wait_for_power_well_disable(display, power_well); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, +static void icl_tc_port_assert_ref_held(struct intel_display *display, struct i915_power_well *power_well, struct intel_digital_port *dig_port) { - if (drm_WARN_ON(&dev_priv->drm, !dig_port)) + if (drm_WARN_ON(display->drm, !dig_port)) return; - if (DISPLAY_VER(dev_priv) == 11 && intel_tc_cold_requires_aux_pw(dig_port)) + if (DISPLAY_VER(display) == 11 && intel_tc_cold_requires_aux_pw(dig_port)) return; - drm_WARN_ON(&dev_priv->drm, !intel_tc_port_ref_held(dig_port)); + drm_WARN_ON(display->drm, !intel_tc_port_ref_held(dig_port)); } #else -static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, +static void icl_tc_port_assert_ref_held(struct intel_display *display, struct i915_power_well *power_well, struct intel_digital_port *dig_port) { @@ -480,8 +481,9 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, #define TGL_AUX_PW_TO_TC_PORT(pw_idx) ((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1) -static void icl_tc_cold_exit(struct drm_i915_private *i915) +static void icl_tc_cold_exit(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); int ret, tries = 0; while (1) { @@ -502,21 +504,22 @@ static void icl_tc_cold_exit(struct drm_i915_private *i915) } static void -icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, +icl_tc_phy_aux_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well); - struct intel_digital_port *dig_port = aux_ch_to_digital_port(dev_priv, aux_ch); + struct intel_digital_port *dig_port = aux_ch_to_digital_port(display, aux_ch); const struct i915_power_well_regs *regs = power_well->desc->ops->regs; bool is_tbt = power_well->desc->is_tc_tbt; bool timeout_expected; - icl_tc_port_assert_ref_held(dev_priv, power_well, dig_port); + icl_tc_port_assert_ref_held(display, power_well, dig_port); - intel_de_rmw(dev_priv, DP_AUX_CH_CTL(aux_ch), + intel_de_rmw(display, DP_AUX_CH_CTL(aux_ch), DP_AUX_CH_CTL_TBT_IO, is_tbt ? DP_AUX_CH_CTL_TBT_IO : 0); - intel_de_rmw(dev_priv, regs->driver, + intel_de_rmw(display, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(i915_power_well_instance(power_well)->hsw.idx)); @@ -526,51 +529,53 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, * exit sequence. */ timeout_expected = is_tbt || intel_tc_cold_requires_aux_pw(dig_port); - if (DISPLAY_VER(dev_priv) == 11 && intel_tc_cold_requires_aux_pw(dig_port)) - icl_tc_cold_exit(dev_priv); + if (DISPLAY_VER(display) == 11 && intel_tc_cold_requires_aux_pw(dig_port)) + icl_tc_cold_exit(display); - hsw_wait_for_power_well_enable(dev_priv, power_well, timeout_expected); + hsw_wait_for_power_well_enable(display, power_well, timeout_expected); - if (DISPLAY_VER(dev_priv) >= 12 && !is_tbt) { + if (DISPLAY_VER(display) >= 12 && !is_tbt) { enum tc_port tc_port; tc_port = TGL_AUX_PW_TO_TC_PORT(i915_power_well_instance(power_well)->hsw.idx); if (wait_for(intel_dkl_phy_read(dev_priv, DKL_CMN_UC_DW_27(tc_port)) & DKL_CMN_UC_DW27_UC_HEALTH, 1)) - drm_warn(&dev_priv->drm, + drm_warn(display->drm, "Timeout waiting TC uC health\n"); } } static void -icl_aux_power_well_enable(struct drm_i915_private *dev_priv, +icl_aux_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - enum phy phy = icl_aux_pw_to_phy(dev_priv, power_well); + struct drm_i915_private *dev_priv = to_i915(display->drm); + enum phy phy = icl_aux_pw_to_phy(display, power_well); if (intel_phy_is_tc(dev_priv, phy)) - return icl_tc_phy_aux_power_well_enable(dev_priv, power_well); - else if (IS_ICELAKE(dev_priv)) - return icl_combo_phy_aux_power_well_enable(dev_priv, + return icl_tc_phy_aux_power_well_enable(display, power_well); + else if (display->platform.icelake) + return icl_combo_phy_aux_power_well_enable(display, power_well); else - return hsw_power_well_enable(dev_priv, power_well); + return hsw_power_well_enable(display, power_well); } static void -icl_aux_power_well_disable(struct drm_i915_private *dev_priv, +icl_aux_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - enum phy phy = icl_aux_pw_to_phy(dev_priv, power_well); + struct drm_i915_private *dev_priv = to_i915(display->drm); + enum phy phy = icl_aux_pw_to_phy(display, power_well); if (intel_phy_is_tc(dev_priv, phy)) - return hsw_power_well_disable(dev_priv, power_well); - else if (IS_ICELAKE(dev_priv)) - return icl_combo_phy_aux_power_well_disable(dev_priv, + return hsw_power_well_disable(display, power_well); + else if (display->platform.icelake) + return icl_combo_phy_aux_power_well_disable(display, power_well); else - return hsw_power_well_disable(dev_priv, power_well); + return hsw_power_well_disable(display, power_well); } /* @@ -578,7 +583,7 @@ icl_aux_power_well_disable(struct drm_i915_private *dev_priv, * enable it, so check if it's enabled and also check if we've requested it to * be enabled. */ -static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, +static bool hsw_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; @@ -588,7 +593,7 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, HSW_PWR_WELL_CTL_STATE(pw_idx); u32 val; - val = intel_de_read(dev_priv, regs->driver); + val = intel_de_read(display, regs->driver); /* * On GEN9 big core due to a DMC bug the driver's request bits for PW1 @@ -596,9 +601,9 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, * BIOS's own request bits, which are forced-on for these power wells * when exiting DC5/6. */ - if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && + if (DISPLAY_VER(display) == 9 && !display->platform.broxton && (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO)) - val |= intel_de_read(dev_priv, regs->bios); + val |= intel_de_read(display, regs->bios); return (val & mask) == mask; } @@ -691,7 +696,6 @@ static void gen9_write_dc_state(struct intel_display *display, static u32 gen9_dc_mask(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 mask; mask = DC_STATE_EN_UPTO_DC5; @@ -701,7 +705,7 @@ static u32 gen9_dc_mask(struct intel_display *display) | DC_STATE_EN_DC9; else if (DISPLAY_VER(display) == 11) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; - else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) + else if (display->platform.geminilake || display->platform.broxton) mask |= DC_STATE_EN_DC9; else mask |= DC_STATE_EN_UPTO_DC6; @@ -798,7 +802,7 @@ static void tgl_disable_dc3co(struct intel_display *display) static void assert_can_enable_dc5(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); + struct drm_i915_private __maybe_unused *dev_priv = to_i915(display->drm); enum i915_power_well_id high_pg; /* Power wells at this level and above must be disabled for DC5 entry */ @@ -808,7 +812,7 @@ static void assert_can_enable_dc5(struct intel_display *display) high_pg = SKL_DISP_PW_2; drm_WARN_ONCE(display->drm, - intel_display_power_well_is_enabled(dev_priv, high_pg), + intel_display_power_well_is_enabled(display, high_pg), "Power wells above platform's DC5 limit still enabled.\n"); drm_WARN_ONCE(display->drm, @@ -822,18 +826,16 @@ static void assert_can_enable_dc5(struct intel_display *display) void gen9_enable_dc5(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - assert_can_enable_dc5(display); drm_dbg_kms(display->drm, "Enabling DC5\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (DISPLAY_VER(display) == 9 && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(display) == 9 && !display->platform.broxton) intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, 0, SKL_SELECT_ALTERNATE_DC_EXIT); - intel_dmc_wl_enable(display); + intel_dmc_wl_enable(display, DC_STATE_EN_UPTO_DC5); gen9_set_dc_state(display, DC_STATE_EN_UPTO_DC5); } @@ -855,26 +857,22 @@ static void assert_can_enable_dc6(struct intel_display *display) void skl_enable_dc6(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - assert_can_enable_dc6(display); drm_dbg_kms(display->drm, "Enabling DC6\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (DISPLAY_VER(display) == 9 && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(display) == 9 && !display->platform.broxton) intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, 0, SKL_SELECT_ALTERNATE_DC_EXIT); - intel_dmc_wl_enable(display); + intel_dmc_wl_enable(display, DC_STATE_EN_UPTO_DC6); gen9_set_dc_state(display, DC_STATE_EN_UPTO_DC6); } void bxt_enable_dc9(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - assert_can_enable_dc9(display); drm_dbg_kms(display->drm, "Enabling DC9\n"); @@ -882,7 +880,7 @@ void bxt_enable_dc9(struct intel_display *display) * Power sequencer reset is needed on BXT/GLK, because the PPS registers * aren't always on, unlike with South Display Engine on PCH. */ - if (IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (display->platform.broxton || display->platform.geminilake) bxt_pps_reset_all(display); gen9_set_dc_state(display, DC_STATE_EN_DC9); } @@ -898,63 +896,56 @@ void bxt_disable_dc9(struct intel_display *display) intel_pps_unlock_regs_wa(display); } -static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, +static void hsw_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well) { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; int pw_idx = i915_power_well_instance(power_well)->hsw.idx; u32 mask = HSW_PWR_WELL_CTL_REQ(pw_idx); - u32 bios_req = intel_de_read(dev_priv, regs->bios); + u32 bios_req = intel_de_read(display, regs->bios); /* Take over the request bit if set by BIOS. */ if (bios_req & mask) { - u32 drv_req = intel_de_read(dev_priv, regs->driver); + u32 drv_req = intel_de_read(display, regs->driver); if (!(drv_req & mask)) - intel_de_write(dev_priv, regs->driver, drv_req | mask); - intel_de_write(dev_priv, regs->bios, bios_req & ~mask); + intel_de_write(display, regs->driver, drv_req | mask); + intel_de_write(display, regs->bios, bios_req & ~mask); } } -static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, +static void bxt_dpio_cmn_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - bxt_dpio_phy_init(display, i915_power_well_instance(power_well)->bxt.phy); } -static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, +static void bxt_dpio_cmn_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - bxt_dpio_phy_uninit(display, i915_power_well_instance(power_well)->bxt.phy); } -static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, +static bool bxt_dpio_cmn_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - return bxt_dpio_phy_is_enabled(display, i915_power_well_instance(power_well)->bxt.phy); } -static void bxt_verify_dpio_phy_power_wells(struct drm_i915_private *dev_priv) +static void bxt_verify_dpio_phy_power_wells(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct i915_power_well *power_well; - power_well = lookup_power_well(dev_priv, BXT_DISP_PW_DPIO_CMN_A); + power_well = lookup_power_well(display, BXT_DISP_PW_DPIO_CMN_A); if (intel_power_well_refcount(power_well) > 0) bxt_dpio_phy_verify_state(display, i915_power_well_instance(power_well)->bxt.phy); - power_well = lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC); + power_well = lookup_power_well(display, VLV_DISP_PW_DPIO_CMN_BC); if (intel_power_well_refcount(power_well) > 0) bxt_dpio_phy_verify_state(display, i915_power_well_instance(power_well)->bxt.phy); - if (IS_GEMINILAKE(dev_priv)) { - power_well = lookup_power_well(dev_priv, + if (display->platform.geminilake) { + power_well = lookup_power_well(display, GLK_DISP_PW_DPIO_CMN_C); if (intel_power_well_refcount(power_well) > 0) bxt_dpio_phy_verify_state(display, @@ -962,21 +953,20 @@ static void bxt_verify_dpio_phy_power_wells(struct drm_i915_private *dev_priv) } } -static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, +static bool gen9_dc_off_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - return ((intel_de_read(display, DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 && (intel_de_read(display, DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0); } -static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) +static void gen9_assert_dbuf_enabled(struct intel_display *display) { + struct drm_i915_private *dev_priv = to_i915(display->drm); u8 hw_enabled_dbuf_slices = intel_enabled_dbuf_slices_mask(dev_priv); - u8 enabled_dbuf_slices = dev_priv->display.dbuf.enabled_slices; + u8 enabled_dbuf_slices = display->dbuf.enabled_slices; - drm_WARN(&dev_priv->drm, + drm_WARN(display->drm, hw_enabled_dbuf_slices != enabled_dbuf_slices, "Unexpected DBuf power power state (0x%08x, expected 0x%08x)\n", hw_enabled_dbuf_slices, @@ -988,18 +978,25 @@ void gen9_disable_dc_states(struct intel_display *display) struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct intel_cdclk_config cdclk_config = {}; + u32 old_state = power_domains->dc_state; if (power_domains->target_dc_state == DC_STATE_EN_DC3CO) { tgl_disable_dc3co(display); return; } - gen9_set_dc_state(display, DC_STATE_DISABLE); - - if (!HAS_DISPLAY(display)) + if (HAS_DISPLAY(display)) { + intel_dmc_wl_get_noreg(display); + gen9_set_dc_state(display, DC_STATE_DISABLE); + intel_dmc_wl_put_noreg(display); + } else { + gen9_set_dc_state(display, DC_STATE_DISABLE); return; + } - intel_dmc_wl_disable(display); + if (old_state == DC_STATE_EN_UPTO_DC5 || + old_state == DC_STATE_EN_UPTO_DC6) + intel_dmc_wl_disable(display); intel_cdclk_get_cdclk(display, &cdclk_config); /* Can't read out voltage_level so can't use intel_cdclk_changed() */ @@ -1007,10 +1004,10 @@ void gen9_disable_dc_states(struct intel_display *display) intel_cdclk_clock_changed(&display->cdclk.hw, &cdclk_config)); - gen9_assert_dbuf_enabled(dev_priv); + gen9_assert_dbuf_enabled(display); - if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) - bxt_verify_dpio_phy_power_wells(dev_priv); + if (display->platform.geminilake || display->platform.broxton) + bxt_verify_dpio_phy_power_wells(display); if (DISPLAY_VER(display) >= 11) /* @@ -1021,18 +1018,15 @@ void gen9_disable_dc_states(struct intel_display *display) intel_combo_phy_init(dev_priv); } -static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, +static void gen9_dc_off_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - gen9_disable_dc_states(display); } -static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, +static void gen9_dc_off_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; struct i915_power_domains *power_domains = &display->power.domains; if (!intel_dmc_has_payload(display)) @@ -1051,63 +1045,58 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, } } -static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, +static void i9xx_power_well_sync_hw_noop(struct intel_display *display, struct i915_power_well *power_well) { } -static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, +static void i9xx_always_on_power_well_noop(struct intel_display *display, struct i915_power_well *power_well) { } -static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static bool i9xx_always_on_power_well_enabled(struct intel_display *display, + struct i915_power_well *power_well) { return true; } -static void i830_pipes_power_well_enable(struct drm_i915_private *dev_priv, +static void i830_pipes_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - - if ((intel_de_read(display, TRANSCONF(dev_priv, PIPE_A)) & TRANSCONF_ENABLE) == 0) + if ((intel_de_read(display, TRANSCONF(display, PIPE_A)) & TRANSCONF_ENABLE) == 0) i830_enable_pipe(display, PIPE_A); - if ((intel_de_read(display, TRANSCONF(dev_priv, PIPE_B)) & TRANSCONF_ENABLE) == 0) + if ((intel_de_read(display, TRANSCONF(display, PIPE_B)) & TRANSCONF_ENABLE) == 0) i830_enable_pipe(display, PIPE_B); } -static void i830_pipes_power_well_disable(struct drm_i915_private *dev_priv, +static void i830_pipes_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - i830_disable_pipe(display, PIPE_B); i830_disable_pipe(display, PIPE_A); } -static bool i830_pipes_power_well_enabled(struct drm_i915_private *dev_priv, +static bool i830_pipes_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; - - return intel_de_read(display, TRANSCONF(dev_priv, PIPE_A)) & TRANSCONF_ENABLE && - intel_de_read(display, TRANSCONF(dev_priv, PIPE_B)) & TRANSCONF_ENABLE; + return intel_de_read(display, TRANSCONF(display, PIPE_A)) & TRANSCONF_ENABLE && + intel_de_read(display, TRANSCONF(display, PIPE_B)) & TRANSCONF_ENABLE; } -static void i830_pipes_power_well_sync_hw(struct drm_i915_private *dev_priv, +static void i830_pipes_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well) { if (intel_power_well_refcount(power_well) > 0) - i830_pipes_power_well_enable(dev_priv, power_well); + i830_pipes_power_well_enable(display, power_well); else - i830_pipes_power_well_disable(dev_priv, power_well); + i830_pipes_power_well_disable(display, power_well); } -static void vlv_set_power_well(struct drm_i915_private *dev_priv, +static void vlv_set_power_well(struct intel_display *display, struct i915_power_well *power_well, bool enable) { + struct drm_i915_private *dev_priv = to_i915(display->drm); int pw_idx = i915_power_well_instance(power_well)->vlv.idx; u32 mask; u32 state; @@ -1131,7 +1120,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl); if (wait_for(COND, 100)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL)); @@ -1142,21 +1131,22 @@ out: vlv_punit_put(dev_priv); } -static void vlv_power_well_enable(struct drm_i915_private *dev_priv, +static void vlv_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - vlv_set_power_well(dev_priv, power_well, true); + vlv_set_power_well(display, power_well, true); } -static void vlv_power_well_disable(struct drm_i915_private *dev_priv, +static void vlv_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - vlv_set_power_well(dev_priv, power_well, false); + vlv_set_power_well(display, power_well, false); } -static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, +static bool vlv_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = to_i915(display->drm); int pw_idx = i915_power_well_instance(power_well)->vlv.idx; bool enabled = false; u32 mask; @@ -1173,7 +1163,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, * We only ever set the power-on and power-gate states, anything * else is unexpected. */ - drm_WARN_ON(&dev_priv->drm, state != PUNIT_PWRGT_PWR_ON(pw_idx) && + drm_WARN_ON(display->drm, state != PUNIT_PWRGT_PWR_ON(pw_idx) && state != PUNIT_PWRGT_PWR_GATE(pw_idx)); if (state == ctrl) enabled = true; @@ -1183,14 +1173,14 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, * is poking at the power controls too. */ ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; - drm_WARN_ON(&dev_priv->drm, ctrl != state); + drm_WARN_ON(display->drm, ctrl != state); vlv_punit_put(dev_priv); return enabled; } -static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) +static void vlv_init_display_clock_gating(struct intel_display *display) { /* * On driver load, a pipe may be active and driving a DSI display. @@ -1198,25 +1188,25 @@ static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) * (and never recovering) in this case. intel_dsi_post_disable() will * clear it when we turn off the display. */ - intel_de_rmw(dev_priv, DSPCLK_GATE_D(dev_priv), + intel_de_rmw(display, DSPCLK_GATE_D(display), ~DPOUNIT_CLOCK_GATE_DISABLE, VRHUNIT_CLOCK_GATE_DISABLE); /* * Disable trickle feed and enable pnd deadline calculation */ - intel_de_write(dev_priv, MI_ARB_VLV, + intel_de_write(display, MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); - intel_de_write(dev_priv, CBR1_VLV, 0); + intel_de_write(display, CBR1_VLV, 0); - drm_WARN_ON(&dev_priv->drm, DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq == 0); - intel_de_write(dev_priv, RAWCLK_FREQ_VLV, - DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq, + drm_WARN_ON(display->drm, DISPLAY_RUNTIME_INFO(display)->rawclk_freq == 0); + intel_de_write(display, RAWCLK_FREQ_VLV, + DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(display)->rawclk_freq, 1000)); } -static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) +static void vlv_display_power_well_init(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *encoder; enum pipe pipe; @@ -1228,17 +1218,17 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) * * CHV DPLL B/C have some issues if VGA mode is enabled. */ - for_each_pipe(dev_priv, pipe) { - u32 val = intel_de_read(dev_priv, DPLL(dev_priv, pipe)); + for_each_pipe(display, pipe) { + u32 val = intel_de_read(display, DPLL(display, pipe)); val |= DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS; if (pipe != PIPE_A) val |= DPLL_INTEGRATED_CRI_CLK_VLV; - intel_de_write(dev_priv, DPLL(dev_priv, pipe), val); + intel_de_write(display, DPLL(display, pipe), val); } - vlv_init_display_clock_gating(dev_priv); + vlv_init_display_clock_gating(display); spin_lock_irq(&dev_priv->irq_lock); valleyview_enable_display_irqs(dev_priv); @@ -1248,14 +1238,14 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) * During driver initialization/resume we can avoid restoring the * part of the HW/SW state that will be inited anyway explicitly. */ - if (dev_priv->display.power.domains.initializing) + if (display->power.domains.initializing) return; intel_hpd_init(dev_priv); intel_hpd_poll_disable(dev_priv); /* Re-enable the ADPA, if we have one */ - for_each_intel_encoder(&dev_priv->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { if (encoder->type == INTEL_OUTPUT_ANALOG) intel_crt_reset(&encoder->base); } @@ -1265,9 +1255,9 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) intel_pps_unlock_regs_wa(display); } -static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) +static void vlv_display_power_well_deinit(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); spin_lock_irq(&dev_priv->irq_lock); valleyview_disable_display_irqs(dev_priv); @@ -1279,33 +1269,33 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) vlv_pps_reset_all(display); /* Prevent us from re-enabling polling on accident in late suspend */ - if (!dev_priv->drm.dev->power.is_suspended) + if (!display->drm->dev->power.is_suspended) intel_hpd_poll_enable(dev_priv); } -static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, +static void vlv_display_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - vlv_set_power_well(dev_priv, power_well, true); + vlv_set_power_well(display, power_well, true); - vlv_display_power_well_init(dev_priv); + vlv_display_power_well_init(display); } -static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, +static void vlv_display_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - vlv_display_power_well_deinit(dev_priv); + vlv_display_power_well_deinit(display); - vlv_set_power_well(dev_priv, power_well, false); + vlv_set_power_well(display, power_well, false); } -static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, +static void vlv_dpio_cmn_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - vlv_set_power_well(dev_priv, power_well, true); + vlv_set_power_well(display, power_well, true); /* * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx - @@ -1318,32 +1308,32 @@ static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, * both PLLs disabled, or we risk losing DPIO and PLL * synchronization. */ - intel_de_rmw(dev_priv, DPIO_CTL, 0, DPIO_CMNRST); + intel_de_rmw(display, DPIO_CTL, 0, DPIO_CMNRST); } -static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, +static void vlv_dpio_cmn_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe; - for_each_pipe(dev_priv, pipe) + for_each_pipe(display, pipe) assert_pll_disabled(dev_priv, pipe); /* Assert common reset */ - intel_de_rmw(dev_priv, DPIO_CTL, DPIO_CMNRST, 0); + intel_de_rmw(display, DPIO_CTL, DPIO_CMNRST, 0); - vlv_set_power_well(dev_priv, power_well, false); + vlv_set_power_well(display, power_well, false); } #define BITS_SET(val, bits) (((val) & (bits)) == (bits)) static void assert_chv_phy_status(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_well *cmn_bc = - lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC); + lookup_power_well(display, VLV_DISP_PW_DPIO_CMN_BC); struct i915_power_well *cmn_d = - lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D); + lookup_power_well(display, CHV_DISP_PW_DPIO_CMN_D); u32 phy_control = display->power.chv_phy_control; u32 phy_status = 0; u32 phy_status_mask = 0xffffffff; @@ -1368,7 +1358,7 @@ static void assert_chv_phy_status(struct intel_display *display) PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0) | PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1)); - if (intel_power_well_is_enabled(dev_priv, cmn_bc)) { + if (intel_power_well_is_enabled(display, cmn_bc)) { phy_status |= PHY_POWERGOOD(DPIO_PHY0); /* this assumes override is only used to enable lanes */ @@ -1409,7 +1399,7 @@ static void assert_chv_phy_status(struct intel_display *display) phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1); } - if (intel_power_well_is_enabled(dev_priv, cmn_d)) { + if (intel_power_well_is_enabled(display, cmn_d)) { phy_status |= PHY_POWERGOOD(DPIO_PHY1); /* this assumes override is only used to enable lanes */ @@ -1444,10 +1434,10 @@ static void assert_chv_phy_status(struct intel_display *display) #undef BITS_SET -static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, +static void chv_dpio_cmn_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); enum i915_power_well_id id = i915_power_well_instance(power_well)->id; enum dpio_phy phy; u32 tmp; @@ -1463,7 +1453,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - vlv_set_power_well(dev_priv, power_well, true); + vlv_set_power_well(display, power_well, true); /* Poll for phypwrgood signal */ if (intel_de_wait_for_set(display, DISPLAY_PHY_STATUS, @@ -1507,10 +1497,10 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, assert_chv_phy_status(display); } -static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, +static void chv_dpio_cmn_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - struct intel_display *display = &dev_priv->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); enum i915_power_well_id id = i915_power_well_instance(power_well)->id; enum dpio_phy phy; @@ -1531,7 +1521,7 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, intel_de_write(display, DISPLAY_PHY_CONTROL, display->power.chv_phy_control); - vlv_set_power_well(dev_priv, power_well, false); + vlv_set_power_well(display, power_well, false); drm_dbg_kms(display->drm, "Disabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n", @@ -1543,9 +1533,10 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, assert_chv_phy_status(display); } -static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy, +static void assert_chv_phy_powergate(struct intel_display *display, enum dpio_phy phy, enum dpio_channel ch, bool override, unsigned int mask) { + struct drm_i915_private *dev_priv = to_i915(display->drm); u32 reg, val, expected, actual; /* @@ -1555,7 +1546,7 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi * reset (ie. the power well has been disabled at * least once). */ - if (!dev_priv->display.power.chv_phy_assert[phy]) + if (!display->power.chv_phy_assert[phy]) return; if (ch == DPIO_CH0) @@ -1598,7 +1589,7 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi actual = REG_FIELD_GET(DPIO_ANYDL_POWERDOWN_CH1 | DPIO_ALLDL_POWERDOWN_CH1, val); - drm_WARN(&dev_priv->drm, actual != expected, + drm_WARN(display->drm, actual != expected, "Unexpected DPIO lane power down: all %d, any %d. Expected: all %d, any %d. (0x%x = 0x%08x)\n", !!(actual & DPIO_ALLDL_POWERDOWN), !!(actual & DPIO_ANYDL_POWERDOWN), @@ -1607,10 +1598,9 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi reg, val); } -bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, +bool chv_phy_powergate_ch(struct intel_display *display, enum dpio_phy phy, enum dpio_channel ch, bool override) { - struct intel_display *display = &dev_priv->display; struct i915_power_domains *power_domains = &display->power.domains; bool was_override; @@ -1645,7 +1635,6 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool override, unsigned int mask) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_power_domains *power_domains = &display->power.domains; enum dpio_phy phy = vlv_dig_port_to_phy(enc_to_dig_port(encoder)); enum dpio_channel ch = vlv_dig_port_to_channel(enc_to_dig_port(encoder)); @@ -1669,14 +1658,15 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, assert_chv_phy_status(display); - assert_chv_phy_powergate(dev_priv, phy, ch, override, mask); + assert_chv_phy_powergate(display, phy, ch, override, mask); mutex_unlock(&power_domains->lock); } -static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, +static bool chv_pipe_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe = PIPE_A; bool enabled; u32 state, ctrl; @@ -1688,7 +1678,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, * We only ever set the power-on and power-gate states, anything * else is unexpected. */ - drm_WARN_ON(&dev_priv->drm, state != DP_SSS_PWR_ON(pipe) && + drm_WARN_ON(display->drm, state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe)); enabled = state == DP_SSS_PWR_ON(pipe); @@ -1697,17 +1687,18 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, * is poking at the power controls too. */ ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSC_MASK(pipe); - drm_WARN_ON(&dev_priv->drm, ctrl << 16 != state); + drm_WARN_ON(display->drm, ctrl << 16 != state); vlv_punit_put(dev_priv); return enabled; } -static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, +static void chv_set_pipe_power_well(struct intel_display *display, struct i915_power_well *power_well, bool enable) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe = PIPE_A; u32 state; u32 ctrl; @@ -1728,7 +1719,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, ctrl); if (wait_for(COND, 100)) - drm_err(&dev_priv->drm, + drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM)); @@ -1739,32 +1730,33 @@ out: vlv_punit_put(dev_priv); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, +static void chv_pipe_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well) { - intel_de_write(dev_priv, DISPLAY_PHY_CONTROL, - dev_priv->display.power.chv_phy_control); + intel_de_write(display, DISPLAY_PHY_CONTROL, + display->power.chv_phy_control); } -static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, +static void chv_pipe_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - chv_set_pipe_power_well(dev_priv, power_well, true); + chv_set_pipe_power_well(display, power_well, true); - vlv_display_power_well_init(dev_priv); + vlv_display_power_well_init(display); } -static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, +static void chv_pipe_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - vlv_display_power_well_deinit(dev_priv); + vlv_display_power_well_deinit(display); - chv_set_pipe_power_well(dev_priv, power_well, false); + chv_set_pipe_power_well(display, power_well, false); } static void -tgl_tc_cold_request(struct drm_i915_private *i915, bool block) +tgl_tc_cold_request(struct intel_display *display, bool block) { + struct drm_i915_private *i915 = to_i915(display->drm); u8 tries = 0; int ret; @@ -1805,31 +1797,31 @@ tgl_tc_cold_request(struct drm_i915_private *i915, bool block) } static void -tgl_tc_cold_off_power_well_enable(struct drm_i915_private *i915, +tgl_tc_cold_off_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - tgl_tc_cold_request(i915, true); + tgl_tc_cold_request(display, true); } static void -tgl_tc_cold_off_power_well_disable(struct drm_i915_private *i915, +tgl_tc_cold_off_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - tgl_tc_cold_request(i915, false); + tgl_tc_cold_request(display, false); } static void -tgl_tc_cold_off_power_well_sync_hw(struct drm_i915_private *i915, +tgl_tc_cold_off_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well) { if (intel_power_well_refcount(power_well) > 0) - tgl_tc_cold_off_power_well_enable(i915, power_well); + tgl_tc_cold_off_power_well_enable(display, power_well); else - tgl_tc_cold_off_power_well_disable(i915, power_well); + tgl_tc_cold_off_power_well_disable(display, power_well); } static bool -tgl_tc_cold_off_power_well_is_enabled(struct drm_i915_private *dev_priv, +tgl_tc_cold_off_power_well_is_enabled(struct intel_display *display, struct i915_power_well *power_well) { /* @@ -1839,17 +1831,18 @@ tgl_tc_cold_off_power_well_is_enabled(struct drm_i915_private *dev_priv, return intel_power_well_refcount(power_well); } -static void xelpdp_aux_power_well_enable(struct drm_i915_private *dev_priv, +static void xelpdp_aux_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = to_i915(display->drm); enum aux_ch aux_ch = i915_power_well_instance(power_well)->xelpdp.aux_ch; - enum phy phy = icl_aux_pw_to_phy(dev_priv, power_well); + enum phy phy = icl_aux_pw_to_phy(display, power_well); if (intel_phy_is_tc(dev_priv, phy)) - icl_tc_port_assert_ref_held(dev_priv, power_well, - aux_ch_to_digital_port(dev_priv, aux_ch)); + icl_tc_port_assert_ref_held(display, power_well, + aux_ch_to_digital_port(display, aux_ch)); - intel_de_rmw(dev_priv, XELPDP_DP_AUX_CH_CTL(dev_priv, aux_ch), + intel_de_rmw(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), XELPDP_DP_AUX_CH_CTL_POWER_REQUEST, XELPDP_DP_AUX_CH_CTL_POWER_REQUEST); @@ -1862,57 +1855,57 @@ static void xelpdp_aux_power_well_enable(struct drm_i915_private *dev_priv, usleep_range(600, 1200); } -static void xelpdp_aux_power_well_disable(struct drm_i915_private *dev_priv, +static void xelpdp_aux_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { enum aux_ch aux_ch = i915_power_well_instance(power_well)->xelpdp.aux_ch; - intel_de_rmw(dev_priv, XELPDP_DP_AUX_CH_CTL(dev_priv, aux_ch), + intel_de_rmw(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), XELPDP_DP_AUX_CH_CTL_POWER_REQUEST, 0); usleep_range(10, 30); } -static bool xelpdp_aux_power_well_enabled(struct drm_i915_private *dev_priv, +static bool xelpdp_aux_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { enum aux_ch aux_ch = i915_power_well_instance(power_well)->xelpdp.aux_ch; - return intel_de_read(dev_priv, XELPDP_DP_AUX_CH_CTL(dev_priv, aux_ch)) & + return intel_de_read(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch)) & XELPDP_DP_AUX_CH_CTL_POWER_STATUS; } -static void xe2lpd_pica_power_well_enable(struct drm_i915_private *dev_priv, +static void xe2lpd_pica_power_well_enable(struct intel_display *display, struct i915_power_well *power_well) { - intel_de_write(dev_priv, XE2LPD_PICA_PW_CTL, + intel_de_write(display, XE2LPD_PICA_PW_CTL, XE2LPD_PICA_CTL_POWER_REQUEST); - if (intel_de_wait_for_set(dev_priv, XE2LPD_PICA_PW_CTL, + if (intel_de_wait_for_set(display, XE2LPD_PICA_PW_CTL, XE2LPD_PICA_CTL_POWER_STATUS, 1)) { - drm_dbg_kms(&dev_priv->drm, "pica power well enable timeout\n"); + drm_dbg_kms(display->drm, "pica power well enable timeout\n"); - drm_WARN(&dev_priv->drm, 1, "Power well PICA timeout when enabled"); + drm_WARN(display->drm, 1, "Power well PICA timeout when enabled"); } } -static void xe2lpd_pica_power_well_disable(struct drm_i915_private *dev_priv, +static void xe2lpd_pica_power_well_disable(struct intel_display *display, struct i915_power_well *power_well) { - intel_de_write(dev_priv, XE2LPD_PICA_PW_CTL, 0); + intel_de_write(display, XE2LPD_PICA_PW_CTL, 0); - if (intel_de_wait_for_clear(dev_priv, XE2LPD_PICA_PW_CTL, + if (intel_de_wait_for_clear(display, XE2LPD_PICA_PW_CTL, XE2LPD_PICA_CTL_POWER_STATUS, 1)) { - drm_dbg_kms(&dev_priv->drm, "pica power well disable timeout\n"); + drm_dbg_kms(display->drm, "pica power well disable timeout\n"); - drm_WARN(&dev_priv->drm, 1, "Power well PICA timeout when disabled"); + drm_WARN(display->drm, 1, "Power well PICA timeout when disabled"); } } -static bool xe2lpd_pica_power_well_enabled(struct drm_i915_private *dev_priv, +static bool xe2lpd_pica_power_well_enabled(struct intel_display *display, struct i915_power_well *power_well) { - return intel_de_read(dev_priv, XE2LPD_PICA_PW_CTL) & + return intel_de_read(display, XE2LPD_PICA_PW_CTL) & XE2LPD_PICA_CTL_POWER_STATUS; } diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.h b/drivers/gpu/drm/i915/display/intel_display_power_well.h index 93559f7c6100..338379dae44c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.h +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.h @@ -10,21 +10,20 @@ #include "intel_display_power.h" #include "intel_dpio_phy.h" -struct drm_i915_private; struct i915_power_well_ops; struct intel_display; struct intel_encoder; -#define for_each_power_well(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->display.power.domains.power_wells; \ - (__power_well) - (__dev_priv)->display.power.domains.power_wells < \ - (__dev_priv)->display.power.domains.power_well_count; \ +#define for_each_power_well(___display, __power_well) \ + for ((__power_well) = (___display)->power.domains.power_wells; \ + (__power_well) - (___display)->power.domains.power_wells < \ + (___display)->power.domains.power_well_count; \ (__power_well)++) -#define for_each_power_well_reverse(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->display.power.domains.power_wells + \ - (__dev_priv)->display.power.domains.power_well_count - 1; \ - (__power_well) - (__dev_priv)->display.power.domains.power_wells >= 0; \ +#define for_each_power_well_reverse(___display, __power_well) \ + for ((__power_well) = (___display)->power.domains.power_wells + \ + (___display)->power.domains.power_well_count - 1; \ + (__power_well) - (___display)->power.domains.power_wells >= 0; \ (__power_well)--) /* @@ -127,23 +126,23 @@ struct i915_power_well { u8 instance_idx; }; -struct i915_power_well *lookup_power_well(struct drm_i915_private *i915, +struct i915_power_well *lookup_power_well(struct intel_display *display, enum i915_power_well_id id); -void intel_power_well_enable(struct drm_i915_private *i915, +void intel_power_well_enable(struct intel_display *display, struct i915_power_well *power_well); -void intel_power_well_disable(struct drm_i915_private *i915, +void intel_power_well_disable(struct intel_display *display, struct i915_power_well *power_well); -void intel_power_well_sync_hw(struct drm_i915_private *i915, +void intel_power_well_sync_hw(struct intel_display *display, struct i915_power_well *power_well); -void intel_power_well_get(struct drm_i915_private *i915, +void intel_power_well_get(struct intel_display *display, struct i915_power_well *power_well); -void intel_power_well_put(struct drm_i915_private *i915, +void intel_power_well_put(struct intel_display *display, struct i915_power_well *power_well); -bool intel_power_well_is_enabled(struct drm_i915_private *i915, +bool intel_power_well_is_enabled(struct intel_display *display, struct i915_power_well *power_well); bool intel_power_well_is_enabled_cached(struct i915_power_well *power_well); -bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, +bool intel_display_power_well_is_enabled(struct intel_display *display, enum i915_power_well_id power_well_id); bool intel_power_well_is_always_on(struct i915_power_well *power_well); const char *intel_power_well_name(struct i915_power_well *power_well); @@ -152,7 +151,7 @@ int intel_power_well_refcount(struct i915_power_well *power_well); void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool override, unsigned int mask); -bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, +bool chv_phy_powergate_ch(struct intel_display *display, enum dpio_phy phy, enum dpio_channel ch, bool override); void gen9_enable_dc5(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_display_reset.c b/drivers/gpu/drm/i915/display/intel_display_reset.c index 49e2e650ebcd..093b386c95e8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_reset.c +++ b/drivers/gpu/drm/i915/display/intel_display_reset.c @@ -114,11 +114,11 @@ void intel_display_reset_finish(struct drm_i915_private *i915) * so need a full re-initialization. */ intel_pps_unlock_regs_wa(display); - intel_display_driver_init_hw(i915); + intel_display_driver_init_hw(display); intel_clock_gating_init(i915); intel_hpd_init(i915); - ret = __intel_display_driver_resume(i915, state, ctx); + ret = __intel_display_driver_resume(display, state, ctx); if (ret) drm_err(&i915->drm, "Restoring old state failed with %i\n", ret); diff --git a/drivers/gpu/drm/i915/display/intel_display_snapshot.c b/drivers/gpu/drm/i915/display/intel_display_snapshot.c index 030c4f873da1..25ba043cbb65 100644 --- a/drivers/gpu/drm/i915/display/intel_display_snapshot.c +++ b/drivers/gpu/drm/i915/display/intel_display_snapshot.c @@ -3,7 +3,9 @@ #include <linux/slab.h> -#include "i915_drv.h" +#include <drm/drm_drv.h> + +#include "intel_display_core.h" #include "intel_display_device.h" #include "intel_display_params.h" #include "intel_display_snapshot.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_trace.h b/drivers/gpu/drm/i915/display/intel_display_trace.h index 9bd8f1e505b0..338b9f7b20b8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_trace.h +++ b/drivers/gpu/drm/i915/display/intel_display_trace.h @@ -14,8 +14,8 @@ #include <linux/types.h> #include <linux/tracepoint.h> -#include "i915_drv.h" #include "intel_crtc.h" +#include "intel_display_core.h" #include "intel_display_limits.h" #include "intel_display_types.h" #include "intel_vblank.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ff6eb93337e0..8271e50e3644 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -45,6 +45,7 @@ #include "i915_vma_types.h" #include "intel_bios.h" #include "intel_display.h" +#include "intel_display_conversion.h" #include "intel_display_limits.h" #include "intel_display_power.h" #include "intel_dpll_mgr.h" @@ -301,6 +302,15 @@ struct intel_panel_bl_funcs { u32 (*hz_to_pwm)(struct intel_connector *connector, u32 hz); }; +/* in 100us units */ +struct intel_pps_delays { + u16 power_up; /* eDP: T1+T3, LVDS: T1+T2 */ + u16 backlight_on; /* eDP: T8, LVDS: T5 */ + u16 backlight_off; /* eDP: T9, LVDS: T6/TX */ + u16 power_down; /* eDP: T10, LVDS: T3 */ + u16 power_cycle; /* eDP: T11+T12, LVDS: T7+T4 */ +}; + enum drrs_type { DRRS_TYPE_NONE, DRRS_TYPE_STATIC, @@ -328,7 +338,7 @@ struct intel_vbt_panel_data { int preemphasis; int vswing; int bpp; - struct edp_power_seq pps; + struct intel_pps_delays pps; u8 drrs_msa_timing_delay; bool low_vswing; bool hobl; @@ -587,6 +597,8 @@ struct intel_atomic_state { bool skip_intermediate_wm; bool rps_interactive; + + struct work_struct cleanup_work; }; struct intel_plane_state { @@ -697,8 +709,8 @@ struct intel_initial_plane_config { }; struct intel_scaler { - int in_use; u32 mode; + bool in_use; }; struct intel_crtc_scaler_state { @@ -769,6 +781,7 @@ struct skl_wm_level { u8 lines; bool enable; bool ignore_lines; + bool auto_min_alloc_wm_enable; bool can_sagv; }; @@ -863,6 +876,13 @@ struct intel_crtc_wm_state { struct skl_ddb_entry plane_ddb[I915_MAX_PLANES]; /* pre-icl: for planar Y */ struct skl_ddb_entry plane_ddb_y[I915_MAX_PLANES]; + + /* + * xe3: Minimum amount of display blocks and minimum + * sagv allocation required for async flip + */ + u16 plane_min_ddb[I915_MAX_PLANES]; + u16 plane_interim_ddb[I915_MAX_PLANES]; } skl; struct { @@ -1140,8 +1160,6 @@ struct intel_crtc_state { bool double_wide; - int pbn; - struct intel_crtc_scaler_state scaler_state; /* w/a for waiting 2 vblanks during crtc enable */ @@ -1235,7 +1253,7 @@ struct intel_crtc_state { /* Display Stream compression state */ struct { bool compression_enable; - bool dsc_split; + int num_streams; /* Compressed Bpp in U6.4 format (first 4 bits for fractional part) */ u16 compressed_bpp_x16; u8 slice_count; @@ -1568,8 +1586,8 @@ struct intel_pps { * requiring a reinitialization. Only relevant on BXT+. */ bool bxt_pps_reset; - struct edp_power_seq pps_delays; - struct edp_power_seq bios_pps_delays; + struct intel_pps_delays pps_delays; + struct intel_pps_delays bios_pps_delays; }; struct intel_psr { @@ -1803,11 +1821,13 @@ struct intel_lspcon { struct intel_digital_port { struct intel_encoder base; - u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; struct intel_lspcon lspcon; enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); + + bool lane_reversal; + bool ddi_a_4_lanes; bool release_cl2_override; u8 max_lanes; /* Used for DP and ICL+ TypeC/DP and TypeC/HDMI ports. */ @@ -1946,6 +1966,19 @@ static inline bool intel_encoder_is_dp(struct intel_encoder *encoder) } } +static inline bool intel_encoder_is_hdmi(struct intel_encoder *encoder) +{ + switch (encoder->type) { + case INTEL_OUTPUT_HDMI: + return true; + case INTEL_OUTPUT_DDI: + /* See if the HDMI encoder is valid. */ + return i915_mmio_reg_valid(enc_to_intel_hdmi(encoder)->hdmi_reg); + default: + return false; + } +} + static inline struct intel_lspcon * enc_to_intel_lspcon(struct intel_encoder *encoder) { @@ -2086,7 +2119,7 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) * intel_display pointer. */ #define __drm_device_to_intel_display(p) \ - ((p) ? &to_i915(p)->display : NULL) + ((p) ? __drm_to_display(p) : NULL) #define __device_to_intel_display(p) \ __drm_device_to_intel_display(dev_get_drvdata(p)) #define __pci_dev_to_intel_display(p) \ diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 87bdacfd9edf..221d3abda791 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -638,8 +638,6 @@ void intel_dmc_disable_program(struct intel_display *display) pipedmc_clock_gating_wa(display, true); disable_all_event_handlers(display); pipedmc_clock_gating_wa(display, false); - - intel_dmc_wl_disable(display); } void assert_dmc_loaded(struct intel_display *display) @@ -1146,8 +1144,6 @@ void intel_dmc_suspend(struct intel_display *display) if (dmc) flush_work(&dmc->work); - intel_dmc_wl_disable(display); - /* Drop the reference held in case DMC isn't loaded. */ if (!intel_dmc_has_payload(display)) intel_dmc_runtime_pm_put(display); diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.c b/drivers/gpu/drm/i915/display/intel_dmc_wl.c index 5634ff07269d..02de3ae15074 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_wl.c +++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.c @@ -5,6 +5,10 @@ #include <linux/kernel.h> +#include <drm/drm_print.h> + +#include "i915_drv.h" +#include "i915_reg.h" #include "intel_de.h" #include "intel_dmc.h" #include "intel_dmc_regs.h" @@ -39,7 +43,11 @@ * potential future use. */ -#define DMC_WAKELOCK_CTL_TIMEOUT 5 +/* + * Define DMC_WAKELOCK_CTL_TIMEOUT_US in microseconds because we use the + * atomic variant of waiting MMIO. + */ +#define DMC_WAKELOCK_CTL_TIMEOUT_US 5000 #define DMC_WAKELOCK_HOLD_TIME 50 struct intel_dmc_wl_range { @@ -47,8 +55,90 @@ struct intel_dmc_wl_range { u32 end; }; -static struct intel_dmc_wl_range lnl_wl_range[] = { +static const struct intel_dmc_wl_range powered_off_ranges[] = { { .start = 0x60000, .end = 0x7ffff }, + {}, +}; + +static const struct intel_dmc_wl_range xe3lpd_dc5_dc6_dmc_ranges[] = { + { .start = 0x45500 }, /* DC_STATE_SEL */ + { .start = 0x457a0, .end = 0x457b0 }, /* DC*_RESIDENCY_COUNTER */ + { .start = 0x45504 }, /* DC_STATE_EN */ + { .start = 0x45400, .end = 0x4540c }, /* PWR_WELL_CTL_* */ + { .start = 0x454f0 }, /* RETENTION_CTRL */ + + /* DBUF_CTL_* */ + { .start = 0x44300 }, + { .start = 0x44304 }, + { .start = 0x44f00 }, + { .start = 0x44f04 }, + { .start = 0x44fe8 }, + { .start = 0x45008 }, + + { .start = 0x46070 }, /* CDCLK_PLL_ENABLE */ + { .start = 0x46000 }, /* CDCLK_CTL */ + { .start = 0x46008 }, /* CDCLK_SQUASH_CTL */ + + /* TRANS_CMTG_CTL_* */ + { .start = 0x6fa88 }, + { .start = 0x6fb88 }, + + { .start = 0x46430 }, /* CHICKEN_DCPR_1 */ + { .start = 0x46434 }, /* CHICKEN_DCPR_2 */ + { .start = 0x454a0 }, /* CHICKEN_DCPR_4 */ + { .start = 0x42084 }, /* CHICKEN_MISC_2 */ + { .start = 0x42088 }, /* CHICKEN_MISC_3 */ + { .start = 0x46160 }, /* CMTG_CLK_SEL */ + { .start = 0x8f000, .end = 0x8ffff }, /* Main DMC registers */ + + {}, +}; + +static const struct intel_dmc_wl_range xe3lpd_dc3co_dmc_ranges[] = { + { .start = 0x454a0 }, /* CHICKEN_DCPR_4 */ + + { .start = 0x45504 }, /* DC_STATE_EN */ + + /* DBUF_CTL_* */ + { .start = 0x44300 }, + { .start = 0x44304 }, + { .start = 0x44f00 }, + { .start = 0x44f04 }, + { .start = 0x44fe8 }, + { .start = 0x45008 }, + + { .start = 0x46070 }, /* CDCLK_PLL_ENABLE */ + { .start = 0x46000 }, /* CDCLK_CTL */ + { .start = 0x46008 }, /* CDCLK_SQUASH_CTL */ + { .start = 0x8f000, .end = 0x8ffff }, /* Main DMC registers */ + + /* Scanline registers */ + { .start = 0x70000 }, + { .start = 0x70004 }, + { .start = 0x70014 }, + { .start = 0x70018 }, + { .start = 0x71000 }, + { .start = 0x71004 }, + { .start = 0x71014 }, + { .start = 0x71018 }, + { .start = 0x72000 }, + { .start = 0x72004 }, + { .start = 0x72014 }, + { .start = 0x72018 }, + { .start = 0x73000 }, + { .start = 0x73004 }, + { .start = 0x73014 }, + { .start = 0x73018 }, + { .start = 0x7b000 }, + { .start = 0x7b004 }, + { .start = 0x7b014 }, + { .start = 0x7b018 }, + { .start = 0x7c000 }, + { .start = 0x7c004 }, + { .start = 0x7c014 }, + { .start = 0x7c018 }, + + {}, }; static void __intel_dmc_wl_release(struct intel_display *display) @@ -72,15 +162,18 @@ static void intel_dmc_wl_work(struct work_struct *work) spin_lock_irqsave(&wl->lock, flags); - /* Bail out if refcount reached zero while waiting for the spinlock */ - if (!refcount_read(&wl->refcount)) + /* + * Bail out if refcount became non-zero while waiting for the spinlock, + * meaning that the lock is now taken again. + */ + if (refcount_read(&wl->refcount)) goto out_unlock; __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); - if (__intel_de_wait_for_register_nowl(display, DMC_WAKELOCK1_CTL, - DMC_WAKELOCK_CTL_ACK, 0, - DMC_WAKELOCK_CTL_TIMEOUT)) { + if (__intel_de_wait_for_register_atomic_nowl(display, DMC_WAKELOCK1_CTL, + DMC_WAKELOCK_CTL_ACK, 0, + DMC_WAKELOCK_CTL_TIMEOUT_US)) { WARN_RATELIMIT(1, "DMC wakelock release timed out"); goto out_unlock; } @@ -91,38 +184,110 @@ out_unlock: spin_unlock_irqrestore(&wl->lock, flags); } -static bool intel_dmc_wl_check_range(u32 address) +static void __intel_dmc_wl_take(struct intel_display *display) { - int i; - bool wl_needed = false; - - for (i = 0; i < ARRAY_SIZE(lnl_wl_range); i++) { - if (address >= lnl_wl_range[i].start && - address <= lnl_wl_range[i].end) { - wl_needed = true; - break; - } + struct intel_dmc_wl *wl = &display->wl; + + /* + * Only try to take the wakelock if it's not marked as taken + * yet. It may be already taken at this point if we have + * already released the last reference, but the work has not + * run yet. + */ + if (wl->taken) + return; + + __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, 0, + DMC_WAKELOCK_CTL_REQ); + + /* + * We need to use the atomic variant of the waiting routine + * because the DMC wakelock is also taken in atomic context. + */ + if (__intel_de_wait_for_register_atomic_nowl(display, DMC_WAKELOCK1_CTL, + DMC_WAKELOCK_CTL_ACK, + DMC_WAKELOCK_CTL_ACK, + DMC_WAKELOCK_CTL_TIMEOUT_US)) { + WARN_RATELIMIT(1, "DMC wakelock ack timed out"); + return; } - return wl_needed; + wl->taken = true; +} + +static bool intel_dmc_wl_reg_in_range(i915_reg_t reg, + const struct intel_dmc_wl_range ranges[]) +{ + u32 offset = i915_mmio_reg_offset(reg); + + for (int i = 0; ranges[i].start; i++) { + u32 end = ranges[i].end ?: ranges[i].start; + + if (ranges[i].start <= offset && offset <= end) + return true; + } + + return false; +} + +static bool intel_dmc_wl_check_range(i915_reg_t reg, u32 dc_state) +{ + const struct intel_dmc_wl_range *ranges; + + /* + * Check that the offset is in one of the ranges for which + * registers are powered off during DC states. + */ + if (intel_dmc_wl_reg_in_range(reg, powered_off_ranges)) + return true; + + /* + * Check that the offset is for a register that is touched by + * the DMC and requires a DC exit for proper access. + */ + switch (dc_state) { + case DC_STATE_EN_DC3CO: + ranges = xe3lpd_dc3co_dmc_ranges; + break; + case DC_STATE_EN_UPTO_DC5: + case DC_STATE_EN_UPTO_DC6: + ranges = xe3lpd_dc5_dc6_dmc_ranges; + break; + default: + ranges = NULL; + } + + if (ranges && intel_dmc_wl_reg_in_range(reg, ranges)) + return true; + + return false; } static bool __intel_dmc_wl_supported(struct intel_display *display) { - if (DISPLAY_VER(display) < 20 || - !intel_dmc_has_payload(display) || - !display->params.enable_dmc_wl) - return false; + return display->params.enable_dmc_wl && intel_dmc_has_payload(display); +} - return true; +static void intel_dmc_wl_sanitize_param(struct intel_display *display) +{ + if (!HAS_DMC_WAKELOCK(display)) + display->params.enable_dmc_wl = 0; + else if (display->params.enable_dmc_wl >= 0) + display->params.enable_dmc_wl = !!display->params.enable_dmc_wl; + else + display->params.enable_dmc_wl = DISPLAY_VER(display) >= 30; + + drm_dbg_kms(display->drm, "Sanitized enable_dmc_wl value: %d\n", + display->params.enable_dmc_wl); } void intel_dmc_wl_init(struct intel_display *display) { struct intel_dmc_wl *wl = &display->wl; - /* don't call __intel_dmc_wl_supported(), DMC is not loaded yet */ - if (DISPLAY_VER(display) < 20 || !display->params.enable_dmc_wl) + intel_dmc_wl_sanitize_param(display); + + if (!display->params.enable_dmc_wl) return; INIT_DELAYED_WORK(&wl->work, intel_dmc_wl_work); @@ -130,7 +295,8 @@ void intel_dmc_wl_init(struct intel_display *display) refcount_set(&wl->refcount, 0); } -void intel_dmc_wl_enable(struct intel_display *display) +/* Must only be called as part of enabling dynamic DC states. */ +void intel_dmc_wl_enable(struct intel_display *display, u32 dc_state) { struct intel_dmc_wl *wl = &display->wl; unsigned long flags; @@ -140,7 +306,9 @@ void intel_dmc_wl_enable(struct intel_display *display) spin_lock_irqsave(&wl->lock, flags); - if (wl->enabled) + wl->dc_state = dc_state; + + if (drm_WARN_ON(display->drm, wl->enabled)) goto out_unlock; /* @@ -151,12 +319,29 @@ void intel_dmc_wl_enable(struct intel_display *display) __intel_de_rmw_nowl(display, DMC_WAKELOCK_CFG, 0, DMC_WAKELOCK_CFG_ENABLE); wl->enabled = true; - wl->taken = false; + + /* + * This would be racy in the following scenario: + * + * 1. Function A calls intel_dmc_wl_get(); + * 2. Some function calls intel_dmc_wl_disable(); + * 3. Some function calls intel_dmc_wl_enable(); + * 4. Concurrently with (3), function A performs the MMIO in between + * setting DMC_WAKELOCK_CFG_ENABLE and asserting the lock with + * __intel_dmc_wl_take(). + * + * TODO: Check with the hardware team whether it is safe to assert the + * hardware lock before enabling to avoid such a scenario. Otherwise, we + * would need to deal with it via software synchronization. + */ + if (refcount_read(&wl->refcount)) + __intel_dmc_wl_take(display); out_unlock: spin_unlock_irqrestore(&wl->lock, flags); } +/* Must only be called as part of disabling dynamic DC states. */ void intel_dmc_wl_disable(struct intel_display *display) { struct intel_dmc_wl *wl = &display->wl; @@ -165,40 +350,63 @@ void intel_dmc_wl_disable(struct intel_display *display) if (!__intel_dmc_wl_supported(display)) return; - flush_delayed_work(&wl->work); + intel_dmc_wl_flush_release_work(display); spin_lock_irqsave(&wl->lock, flags); - if (!wl->enabled) + if (drm_WARN_ON(display->drm, !wl->enabled)) goto out_unlock; /* Disable wakelock in DMC */ __intel_de_rmw_nowl(display, DMC_WAKELOCK_CFG, DMC_WAKELOCK_CFG_ENABLE, 0); - refcount_set(&wl->refcount, 0); wl->enabled = false; + + /* + * The spec is not explicit about the expectation of existing + * lock users at the moment of disabling, but it does say that we must + * clear DMC_WAKELOCK_CTL_REQ, which gives us a clue that it is okay to + * disable with existing lock users. + * + * TODO: Get the correct expectation from the hardware team. + */ + __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); + wl->taken = false; out_unlock: spin_unlock_irqrestore(&wl->lock, flags); } -void intel_dmc_wl_get(struct intel_display *display, i915_reg_t reg) +void intel_dmc_wl_flush_release_work(struct intel_display *display) { struct intel_dmc_wl *wl = &display->wl; - unsigned long flags; if (!__intel_dmc_wl_supported(display)) return; - if (!intel_dmc_wl_check_range(reg.reg)) + flush_delayed_work(&wl->work); +} + +void intel_dmc_wl_get(struct intel_display *display, i915_reg_t reg) +{ + struct intel_dmc_wl *wl = &display->wl; + unsigned long flags; + + if (!__intel_dmc_wl_supported(display)) return; spin_lock_irqsave(&wl->lock, flags); - if (!wl->enabled) + if (i915_mmio_reg_valid(reg) && !intel_dmc_wl_check_range(reg, wl->dc_state)) goto out_unlock; + if (!wl->enabled) { + if (!refcount_inc_not_zero(&wl->refcount)) + refcount_set(&wl->refcount, 1); + goto out_unlock; + } + cancel_delayed_work(&wl->work); if (refcount_inc_not_zero(&wl->refcount)) @@ -206,26 +414,7 @@ void intel_dmc_wl_get(struct intel_display *display, i915_reg_t reg) refcount_set(&wl->refcount, 1); - /* - * Only try to take the wakelock if it's not marked as taken - * yet. It may be already taken at this point if we have - * already released the last reference, but the work has not - * run yet. - */ - if (!wl->taken) { - __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, 0, - DMC_WAKELOCK_CTL_REQ); - - if (__intel_de_wait_for_register_nowl(display, DMC_WAKELOCK1_CTL, - DMC_WAKELOCK_CTL_ACK, - DMC_WAKELOCK_CTL_ACK, - DMC_WAKELOCK_CTL_TIMEOUT)) { - WARN_RATELIMIT(1, "DMC wakelock ack timed out"); - goto out_unlock; - } - - wl->taken = true; - } + __intel_dmc_wl_take(display); out_unlock: spin_unlock_irqrestore(&wl->lock, flags); @@ -239,12 +428,9 @@ void intel_dmc_wl_put(struct intel_display *display, i915_reg_t reg) if (!__intel_dmc_wl_supported(display)) return; - if (!intel_dmc_wl_check_range(reg.reg)) - return; - spin_lock_irqsave(&wl->lock, flags); - if (!wl->enabled) + if (i915_mmio_reg_valid(reg) && !intel_dmc_wl_check_range(reg, wl->dc_state)) goto out_unlock; if (WARN_RATELIMIT(!refcount_read(&wl->refcount), @@ -252,6 +438,9 @@ void intel_dmc_wl_put(struct intel_display *display, i915_reg_t reg) goto out_unlock; if (refcount_dec_and_test(&wl->refcount)) { + if (!wl->enabled) + goto out_unlock; + __intel_dmc_wl_release(display); goto out_unlock; @@ -260,3 +449,13 @@ void intel_dmc_wl_put(struct intel_display *display, i915_reg_t reg) out_unlock: spin_unlock_irqrestore(&wl->lock, flags); } + +void intel_dmc_wl_get_noreg(struct intel_display *display) +{ + intel_dmc_wl_get(display, INVALID_MMIO_REG); +} + +void intel_dmc_wl_put_noreg(struct intel_display *display) +{ + intel_dmc_wl_put(display, INVALID_MMIO_REG); +} diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.h b/drivers/gpu/drm/i915/display/intel_dmc_wl.h index adab51208d0a..5488fbdf29b8 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_wl.h +++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.h @@ -15,17 +15,27 @@ struct intel_display; struct intel_dmc_wl { - spinlock_t lock; /* protects enabled, taken and refcount */ + spinlock_t lock; /* protects enabled, taken, dc_state and refcount */ bool enabled; bool taken; refcount_t refcount; + /* + * We are keeping a copy of the enabled DC state because + * intel_display.power.domains is protected by a mutex and we do + * not want call mutex_lock() in atomic context, where some of + * the tracked MMIO operations happen. + */ + u32 dc_state; struct delayed_work work; }; void intel_dmc_wl_init(struct intel_display *display); -void intel_dmc_wl_enable(struct intel_display *display); +void intel_dmc_wl_enable(struct intel_display *display, u32 dc_state); void intel_dmc_wl_disable(struct intel_display *display); +void intel_dmc_wl_flush_release_work(struct intel_display *display); void intel_dmc_wl_get(struct intel_display *display, i915_reg_t reg); void intel_dmc_wl_put(struct intel_display *display, i915_reg_t reg); +void intel_dmc_wl_get_noreg(struct intel_display *display); +void intel_dmc_wl_put_noreg(struct intel_display *display); #endif /* __INTEL_WAKELOCK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index ea4f775b12a1..1ad25c37f3c2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -28,6 +28,7 @@ #include <linux/export.h> #include <linux/i2c.h> #include <linux/notifier.h> +#include <linux/seq_buf.h> #include <linux/slab.h> #include <linux/sort.h> #include <linux/string_helpers.h> @@ -93,8 +94,6 @@ #include "intel_vrr.h" #include "intel_crtc_state_dump.h" -#define dp_to_i915(__intel_dp) to_i915(dp_to_dig_port(__intel_dp)->base.base.dev) - /* DP DSC throughput values used for slice count calculations KPixels/s */ #define DP_DSC_PEAK_PIXEL_RATE 2720000 #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 @@ -109,10 +108,19 @@ /* Constants for DP DSC configurations */ static const u8 valid_dsc_bpp[] = {6, 8, 10, 12, 15}; -/* With Single pipe configuration, HW is capable of supporting maximum - * of 4 slices per line. +/* + * With Single pipe configuration, HW is capable of supporting maximum of: + * 2 slices per line for ICL, BMG + * 4 slices per line for other platforms. + * For now consider a max of 2 slices per line, which works for all platforms. + * With this we can have max of 4 DSC Slices per pipe. + * + * For higher resolutions where 12 slice support is required with + * ultrajoiner, only then each pipe can support 3 slices. + * + * #TODO Split this better to use 4 slices/dsc engine where supported. */ -static const u8 valid_dsc_slicecount[] = {1, 2, 4}; +static const u8 valid_dsc_slicecount[] = {1, 2, 3, 4}; /** * intel_dp_is_edp - is the given port attached to an eDP panel (either CPU or PCH) @@ -257,6 +265,7 @@ static void intel_dp_set_dpcd_sink_rates(struct intel_dp *intel_dp) static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &intel_dig_port->base; @@ -266,7 +275,7 @@ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) if (intel_dp->num_sink_rates) return; - drm_err(&dp_to_i915(intel_dp)->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s][ENCODER:%d:%s] Invalid DPCD with no link rates, using defaults\n", connector->base.base.id, connector->base.name, encoder->base.base.id, encoder->base.name); @@ -281,6 +290,7 @@ static void intel_dp_set_default_max_sink_lane_count(struct intel_dp *intel_dp) static void intel_dp_set_max_sink_lane_count(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &intel_dig_port->base; @@ -294,7 +304,7 @@ static void intel_dp_set_max_sink_lane_count(struct intel_dp *intel_dp) return; } - drm_err(&dp_to_i915(intel_dp)->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s][ENCODER:%d:%s] Invalid DPCD max lane count (%d), using default\n", connector->base.base.id, connector->base.name, encoder->base.base.id, encoder->base.name, @@ -327,7 +337,9 @@ static int intel_dp_common_len_rate_limit(const struct intel_dp *intel_dp, int intel_dp_common_rate(struct intel_dp *intel_dp, int index) { - if (drm_WARN_ON(&dp_to_i915(intel_dp)->drm, + struct intel_display *display = to_intel_display(intel_dp); + + if (drm_WARN_ON(display->drm, index < 0 || index >= intel_dp->num_common_rates)) return 162000; @@ -454,16 +466,16 @@ int intel_dp_max_link_data_rate(struct intel_dp *intel_dp, bool intel_dp_has_joiner(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); /* eDP MSO is not compatible with joiner */ if (intel_dp->mso_link_count) return false; - return DISPLAY_VER(dev_priv) >= 12 || - (DISPLAY_VER(dev_priv) == 11 && + return DISPLAY_VER(display) >= 12 || + (DISPLAY_VER(display) == 11 && encoder->port != PORT_A); } @@ -492,12 +504,13 @@ static int ehl_max_source_rate(struct intel_dp *intel_dp) static int mtl_max_source_rate(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; if (intel_encoder_is_c10phy(encoder)) return 810000; - if (DISPLAY_VERx100(to_i915(encoder->base.dev)) == 1401) + if (DISPLAY_VERx100(display) == 1401) return 1350000; return 2000000; @@ -551,17 +564,16 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) static const int g4x_rates[] = { 162000, 270000 }; - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + struct intel_display *display = to_intel_display(intel_dp); const int *source_rates; int size, max_rate = 0, vbt_max_rate; /* This should only be done once */ - drm_WARN_ON(&dev_priv->drm, + drm_WARN_ON(display->drm, intel_dp->source_rates || intel_dp->num_source_rates); - if (DISPLAY_VER(dev_priv) >= 14) { - if (IS_BATTLEMAGE(dev_priv)) { + if (DISPLAY_VER(display) >= 14) { + if (display->platform.battlemage) { source_rates = bmg_rates; size = ARRAY_SIZE(bmg_rates); } else { @@ -569,26 +581,26 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) size = ARRAY_SIZE(mtl_rates); } max_rate = mtl_max_source_rate(intel_dp); - } else if (DISPLAY_VER(dev_priv) >= 11) { + } else if (DISPLAY_VER(display) >= 11) { source_rates = icl_rates; size = ARRAY_SIZE(icl_rates); - if (IS_DG2(dev_priv)) + if (display->platform.dg2) max_rate = dg2_max_source_rate(intel_dp); - else if (IS_ALDERLAKE_P(dev_priv) || IS_ALDERLAKE_S(dev_priv) || - IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv)) + else if (display->platform.alderlake_p || display->platform.alderlake_s || + display->platform.dg1 || display->platform.rocketlake) max_rate = 810000; - else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) + else if (display->platform.jasperlake || display->platform.elkhartlake) max_rate = ehl_max_source_rate(intel_dp); else max_rate = icl_max_source_rate(intel_dp); - } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { + } else if (display->platform.geminilake || display->platform.broxton) { source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (DISPLAY_VER(dev_priv) == 9) { + } else if (DISPLAY_VER(display) == 9) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); - } else if ((IS_HASWELL(dev_priv) && !IS_HASWELL_ULX(dev_priv)) || - IS_BROADWELL(dev_priv)) { + } else if ((display->platform.haswell && !display->platform.haswell_ulx) || + display->platform.broadwell) { source_rates = hsw_rates; size = ARRAY_SIZE(hsw_rates); } else { @@ -679,18 +691,18 @@ static int link_config_cmp_by_bw(const void *a, const void *b, const void *p) static void intel_dp_link_config_init(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_dp_link_config *lc; int num_common_lane_configs; int i; int j; - if (drm_WARN_ON(&i915->drm, !is_power_of_2(intel_dp_max_common_lane_count(intel_dp)))) + if (drm_WARN_ON(display->drm, !is_power_of_2(intel_dp_max_common_lane_count(intel_dp)))) return; num_common_lane_configs = ilog2(intel_dp_max_common_lane_count(intel_dp)) + 1; - if (drm_WARN_ON(&i915->drm, intel_dp->num_common_rates * num_common_lane_configs > + if (drm_WARN_ON(display->drm, intel_dp->num_common_rates * num_common_lane_configs > ARRAY_SIZE(intel_dp->link.configs))) return; @@ -714,10 +726,10 @@ static void intel_dp_link_config_init(struct intel_dp *intel_dp) void intel_dp_link_config_get(struct intel_dp *intel_dp, int idx, int *link_rate, int *lane_count) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); const struct intel_dp_link_config *lc; - if (drm_WARN_ON(&i915->drm, idx < 0 || idx >= intel_dp->link.num_configs)) + if (drm_WARN_ON(display->drm, idx < 0 || idx >= intel_dp->link.num_configs)) idx = 0; lc = &intel_dp->link.configs[idx]; @@ -746,9 +758,9 @@ int intel_dp_link_config_index(struct intel_dp *intel_dp, int link_rate, int lan static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, !intel_dp->num_source_rates || !intel_dp->num_sink_rates); intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates, @@ -758,7 +770,7 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) intel_dp->common_rates); /* Paranoia, there should always be something in common. */ - if (drm_WARN_ON(&i915->drm, intel_dp->num_common_rates == 0)) { + if (drm_WARN_ON(display->drm, intel_dp->num_common_rates == 0)) { intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } @@ -806,30 +818,30 @@ int intel_dp_bw_fec_overhead(bool fec_enabled) } static int -small_joiner_ram_size_bits(struct drm_i915_private *i915) +small_joiner_ram_size_bits(struct intel_display *display) { - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return 17280 * 8; - else if (DISPLAY_VER(i915) >= 11) + else if (DISPLAY_VER(display) >= 11) return 7680 * 8; else return 6144 * 8; } -u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 pipe_bpp) +u32 intel_dp_dsc_nearest_valid_bpp(struct intel_display *display, u32 bpp, u32 pipe_bpp) { u32 bits_per_pixel = bpp; int i; /* Error out if the max bpp is less than smallest allowed valid bpp */ if (bits_per_pixel < valid_dsc_bpp[0]) { - drm_dbg_kms(&i915->drm, "Unsupported BPP %u, min %u\n", + drm_dbg_kms(display->drm, "Unsupported BPP %u, min %u\n", bits_per_pixel, valid_dsc_bpp[0]); return 0; } /* From XE_LPD onwards we support from bpc upto uncompressed bpp-1 BPPs */ - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { bits_per_pixel = min(bits_per_pixel, pipe_bpp - 1); /* @@ -841,7 +853,8 @@ u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 p * DSC enabled. */ if (bits_per_pixel < 8) { - drm_dbg_kms(&i915->drm, "Unsupported BPP %u, min 8\n", + drm_dbg_kms(display->drm, + "Unsupported BPP %u, min 8\n", bits_per_pixel); return 0; } @@ -852,7 +865,7 @@ u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 p if (bits_per_pixel < valid_dsc_bpp[i + 1]) break; } - drm_dbg_kms(&i915->drm, "Set dsc bpp from %d to VESA %d\n", + drm_dbg_kms(display->drm, "Set dsc bpp from %d to VESA %d\n", bits_per_pixel, valid_dsc_bpp[i]); bits_per_pixel = valid_dsc_bpp[i]; @@ -887,11 +900,10 @@ static u32 small_joiner_ram_max_bpp(struct intel_display *display, u32 mode_hdisplay, int num_joined_pipes) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 max_bpp; /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp = small_joiner_ram_size_bits(i915) / mode_hdisplay; + max_bpp = small_joiner_ram_size_bits(display) / mode_hdisplay; max_bpp *= num_joined_pipes; @@ -909,11 +921,10 @@ static u32 ultrajoiner_ram_max_bpp(u32 mode_hdisplay) } static -u32 get_max_compressed_bpp_with_joiner(struct drm_i915_private *i915, +u32 get_max_compressed_bpp_with_joiner(struct intel_display *display, u32 mode_clock, u32 mode_hdisplay, int num_joined_pipes) { - struct intel_display *display = to_intel_display(&i915->drm); u32 max_bpp = small_joiner_ram_max_bpp(display, mode_hdisplay, num_joined_pipes); if (num_joined_pipes > 1) @@ -925,7 +936,7 @@ u32 get_max_compressed_bpp_with_joiner(struct drm_i915_private *i915, return max_bpp; } -u16 intel_dp_dsc_get_max_compressed_bpp(struct drm_i915_private *i915, +u16 intel_dp_dsc_get_max_compressed_bpp(struct intel_display *display, u32 link_clock, u32 lane_count, u32 mode_clock, u32 mode_hdisplay, int num_joined_pipes, @@ -967,17 +978,17 @@ u16 intel_dp_dsc_get_max_compressed_bpp(struct drm_i915_private *i915, if (output_format == INTEL_OUTPUT_FORMAT_YCBCR420) bits_per_pixel = min_t(u32, bits_per_pixel, 31); - drm_dbg_kms(&i915->drm, "Max link bpp is %u for %u timeslots " + drm_dbg_kms(display->drm, "Max link bpp is %u for %u timeslots " "total bw %u pixel clock %u\n", bits_per_pixel, timeslots, (link_clock * lane_count * 8), intel_dp_mode_to_fec_clock(mode_clock)); - joiner_max_bpp = get_max_compressed_bpp_with_joiner(i915, mode_clock, + joiner_max_bpp = get_max_compressed_bpp_with_joiner(display, mode_clock, mode_hdisplay, num_joined_pipes); bits_per_pixel = min(bits_per_pixel, joiner_max_bpp); - bits_per_pixel = intel_dp_dsc_nearest_valid_bpp(i915, bits_per_pixel, pipe_bpp); + bits_per_pixel = intel_dp_dsc_nearest_valid_bpp(display, bits_per_pixel, pipe_bpp); return bits_per_pixel; } @@ -986,7 +997,7 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, int mode_clock, int mode_hdisplay, int num_joined_pipes) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); u8 min_slice_count, i; int max_slice_width; @@ -1001,12 +1012,12 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, * Due to some DSC engine BW limitations, we need to enable second * slice and VDSC engine, whenever we approach close enough to max CDCLK */ - if (mode_clock >= ((i915->display.cdclk.max_cdclk_freq * 85) / 100)) + if (mode_clock >= ((display->cdclk.max_cdclk_freq * 85) / 100)) min_slice_count = max_t(u8, min_slice_count, 2); max_slice_width = drm_dp_dsc_sink_max_slice_width(connector->dp.dsc_dpcd); if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported slice width %d by DP DSC Sink device\n", max_slice_width); return 0; @@ -1020,6 +1031,13 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; + /* + * 3 DSC Slices per pipe need 3 DSC engines, + * which is supported only with Ultrajoiner. + */ + if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) + continue; + if (test_slice_count > drm_dp_dsc_sink_max_slice_count(connector->dp.dsc_dpcd, false)) break; @@ -1032,11 +1050,14 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, if (num_joined_pipes > 1 && valid_dsc_slicecount[i] < 2) continue; + if (mode_hdisplay % test_slice_count) + continue; + if (min_slice_count <= test_slice_count) return test_slice_count; } - drm_dbg_kms(&i915->drm, "Unsupported Slice Count %d\n", + drm_dbg_kms(display->drm, "Unsupported Slice Count %d\n", min_slice_count); return 0; } @@ -1044,7 +1065,7 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, static bool source_can_output(struct intel_dp *intel_dp, enum intel_output_format format) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); switch (format) { case INTEL_OUTPUT_FORMAT_RGB: @@ -1056,11 +1077,11 @@ static bool source_can_output(struct intel_dp *intel_dp, * Also, ILK doesn't seem capable of DP YCbCr output. * The displayed image is severly corrupted. SNB+ is fine. */ - return !HAS_GMCH(i915) && !IS_IRONLAKE(i915); + return !HAS_GMCH(display) && !display->platform.ironlake; case INTEL_OUTPUT_FORMAT_YCBCR420: /* Platform < Gen 11 cannot output YCbCr420 format */ - return DISPLAY_VER(i915) >= 11; + return DISPLAY_VER(display) >= 11; default: MISSING_CASE(format); @@ -1120,8 +1141,8 @@ static enum intel_output_format intel_dp_output_format(struct intel_connector *connector, enum intel_output_format sink_format) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); enum intel_output_format force_dsc_output_format = intel_dp->force_dsc_output_format; enum intel_output_format output_format; @@ -1132,7 +1153,7 @@ intel_dp_output_format(struct intel_connector *connector, dfp_can_convert(intel_dp, force_dsc_output_format, sink_format))) return force_dsc_output_format; - drm_dbg_kms(&i915->drm, "Cannot force DSC output format\n"); + drm_dbg_kms(display->drm, "Cannot force DSC output format\n"); } if (sink_format == INTEL_OUTPUT_FORMAT_RGB || @@ -1146,7 +1167,7 @@ intel_dp_output_format(struct intel_connector *connector, else output_format = INTEL_OUTPUT_FORMAT_YCBCR420; - drm_WARN_ON(&i915->drm, !source_can_output(intel_dp, output_format)); + drm_WARN_ON(display->drm, !source_can_output(intel_dp, output_format)); return output_format; } @@ -1197,7 +1218,7 @@ intel_dp_mode_min_output_bpp(struct intel_connector *connector, return intel_dp_output_bpp(output_format, intel_dp_min_bpp(output_format)); } -static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, +static bool intel_dp_hdisplay_bad(struct intel_display *display, int hdisplay) { /* @@ -1213,7 +1234,7 @@ static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, * * TODO: confirm the behaviour on HSW+ */ - return hdisplay == 4096 && !HAS_DDI(dev_priv); + return hdisplay == 4096 && !HAS_DDI(display); } static int intel_dp_max_tmds_clock(struct intel_dp *intel_dp) @@ -1314,7 +1335,7 @@ bool intel_dp_needs_joiner(struct intel_dp *intel_dp, int hdisplay, int clock, int num_joined_pipes) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int hdisplay_limit; if (!intel_dp_has_joiner(intel_dp)) @@ -1322,9 +1343,9 @@ bool intel_dp_needs_joiner(struct intel_dp *intel_dp, num_joined_pipes /= 2; - hdisplay_limit = DISPLAY_VER(i915) >= 30 ? 6144 : 5120; + hdisplay_limit = DISPLAY_VER(display) >= 30 ? 6144 : 5120; - return clock > num_joined_pipes * i915->display.cdclk.max_dotclk_freq || + return clock > num_joined_pipes * display->cdclk.max_dotclk_freq || hdisplay > num_joined_pipes * hdisplay_limit; } @@ -1333,16 +1354,15 @@ int intel_dp_num_joined_pipes(struct intel_dp *intel_dp, int hdisplay, int clock) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); if (connector->force_joined_pipes) return connector->force_joined_pipes; - if (HAS_ULTRAJOINER(i915) && + if (HAS_ULTRAJOINER(display) && intel_dp_needs_joiner(intel_dp, connector, hdisplay, clock, 4)) return 4; - if ((HAS_BIGJOINER(i915) || HAS_UNCOMPRESSED_JOINER(i915)) && + if ((HAS_BIGJOINER(display) || HAS_UNCOMPRESSED_JOINER(display)) && intel_dp_needs_joiner(intel_dp, connector, hdisplay, clock, 2)) return 2; @@ -1351,12 +1371,12 @@ int intel_dp_num_joined_pipes(struct intel_dp *intel_dp, bool intel_dp_has_dsc(const struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); - if (!HAS_DSC(i915)) + if (!HAS_DSC(display)) return false; - if (connector->mst_port && !HAS_DSC_MST(i915)) + if (connector->mst_port && !HAS_DSC_MST(display)) return false; if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP && @@ -1373,13 +1393,14 @@ static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *_connector, const struct drm_display_mode *mode) { + struct intel_display *display = to_intel_display(_connector->dev); struct intel_connector *connector = to_intel_connector(_connector); struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode; int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; - int max_dotclk = dev_priv->display.cdclk.max_dotclk_freq; + int max_dotclk = display->cdclk.max_dotclk_freq; u16 dsc_max_compressed_bpp = 0; u8 dsc_slice_count = 0; enum drm_mode_status status; @@ -1412,7 +1433,7 @@ intel_dp_mode_valid(struct drm_connector *_connector, if (target_clock > max_dotclk) return MODE_CLOCK_HIGH; - if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay)) + if (intel_dp_hdisplay_bad(display, mode->hdisplay)) return MODE_H_ILLEGAL; max_link_clock = intel_dp_max_link_rate(intel_dp); @@ -1447,7 +1468,7 @@ intel_dp_mode_valid(struct drm_connector *_connector, true); } else if (drm_dp_sink_supports_fec(connector->dp.fec_capability)) { dsc_max_compressed_bpp = - intel_dp_dsc_get_max_compressed_bpp(dev_priv, + intel_dp_dsc_get_max_compressed_bpp(display, max_link_clock, max_lanes, target_clock, @@ -1465,7 +1486,7 @@ intel_dp_mode_valid(struct drm_connector *_connector, dsc = dsc_max_compressed_bpp && dsc_slice_count; } - if (intel_dp_joiner_needs_dsc(dev_priv, num_joined_pipes) && !dsc) + if (intel_dp_joiner_needs_dsc(display, num_joined_pipes) && !dsc) return MODE_CLOCK_HIGH; if (mode_rate > max_rate && !dsc) @@ -1478,51 +1499,43 @@ intel_dp_mode_valid(struct drm_connector *_connector, return intel_mode_valid_max_plane_size(dev_priv, mode, num_joined_pipes); } -bool intel_dp_source_supports_tps3(struct drm_i915_private *i915) +bool intel_dp_source_supports_tps3(struct intel_display *display) { - return DISPLAY_VER(i915) >= 9 || IS_BROADWELL(i915) || IS_HASWELL(i915); + return DISPLAY_VER(display) >= 9 || + display->platform.broadwell || display->platform.haswell; } -bool intel_dp_source_supports_tps4(struct drm_i915_private *i915) +bool intel_dp_source_supports_tps4(struct intel_display *display) { - return DISPLAY_VER(i915) >= 10; + return DISPLAY_VER(display) >= 10; } -static void snprintf_int_array(char *str, size_t len, - const int *array, int nelem) +static void seq_buf_print_array(struct seq_buf *s, const int *array, int nelem) { int i; - str[0] = '\0'; - - for (i = 0; i < nelem; i++) { - int r = snprintf(str, len, "%s%d", i ? ", " : "", array[i]); - if (r >= len) - return; - str += r; - len -= r; - } + for (i = 0; i < nelem; i++) + seq_buf_printf(s, "%s%d", i ? ", " : "", array[i]); } static void intel_dp_print_rates(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - char str[128]; /* FIXME: too big for stack? */ + struct intel_display *display = to_intel_display(intel_dp); + DECLARE_SEQ_BUF(s, 128); /* FIXME: too big for stack? */ if (!drm_debug_enabled(DRM_UT_KMS)) return; - snprintf_int_array(str, sizeof(str), - intel_dp->source_rates, intel_dp->num_source_rates); - drm_dbg_kms(&i915->drm, "source rates: %s\n", str); + seq_buf_print_array(&s, intel_dp->source_rates, intel_dp->num_source_rates); + drm_dbg_kms(display->drm, "source rates: %s\n", seq_buf_str(&s)); - snprintf_int_array(str, sizeof(str), - intel_dp->sink_rates, intel_dp->num_sink_rates); - drm_dbg_kms(&i915->drm, "sink rates: %s\n", str); + seq_buf_clear(&s); + seq_buf_print_array(&s, intel_dp->sink_rates, intel_dp->num_sink_rates); + drm_dbg_kms(display->drm, "sink rates: %s\n", seq_buf_str(&s)); - snprintf_int_array(str, sizeof(str), - intel_dp->common_rates, intel_dp->num_common_rates); - drm_dbg_kms(&i915->drm, "common rates: %s\n", str); + seq_buf_clear(&s); + seq_buf_print_array(&s, intel_dp->common_rates, intel_dp->num_common_rates); + drm_dbg_kms(display->drm, "common rates: %s\n", seq_buf_str(&s)); } static int forced_link_rate(struct intel_dp *intel_dp) @@ -1559,11 +1572,11 @@ intel_dp_min_link_rate(struct intel_dp *intel_dp) int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int i = intel_dp_rate_index(intel_dp->sink_rates, intel_dp->num_sink_rates, rate); - if (drm_WARN_ON(&i915->drm, i < 0)) + if (drm_WARN_ON(display->drm, i < 0)) i = 0; return i; @@ -1593,13 +1606,13 @@ bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp) static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, const struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (DISPLAY_VER(dev_priv) >= 12) + if (DISPLAY_VER(display) >= 12) return true; - if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A && + if (DISPLAY_VER(display) == 11 && encoder->port != PORT_A && !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) return true; @@ -1614,13 +1627,15 @@ bool intel_dp_supports_fec(struct intel_dp *intel_dp, drm_dp_sink_supports_fec(connector->dp.fec_capability); } -bool intel_dp_supports_dsc(const struct intel_connector *connector, +bool intel_dp_supports_dsc(struct intel_dp *intel_dp, + const struct intel_connector *connector, const struct intel_crtc_state *crtc_state) { if (!intel_dp_has_dsc(connector)) return false; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP) && !crtc_state->fec_enable) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP) && + !intel_dp_supports_fec(intel_dp, connector, crtc_state)) return false; return intel_dsc_source_support(crtc_state); @@ -1662,8 +1677,8 @@ static int intel_dp_max_bpp(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool respect_downstream_limits) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; int bpp, bpc; bpc = crtc_state->pipe_bpp / 3; @@ -1685,13 +1700,13 @@ static int intel_dp_max_bpp(struct intel_dp *intel_dp, bpp = bpc * 3; if (intel_dp_is_edp(intel_dp)) { /* Get bpp from vbt only for panels that dont have bpp in edid */ - if (intel_connector->base.display_info.bpc == 0 && - intel_connector->panel.vbt.edp.bpp && - intel_connector->panel.vbt.edp.bpp < bpp) { - drm_dbg_kms(&dev_priv->drm, + if (connector->base.display_info.bpc == 0 && + connector->panel.vbt.edp.bpp && + connector->panel.vbt.edp.bpp < bpp) { + drm_dbg_kms(display->drm, "clamping bpp for eDP panel to BIOS-provided %i\n", - intel_connector->panel.vbt.edp.bpp); - bpp = intel_connector->panel.vbt.edp.bpp; + connector->panel.vbt.edp.bpp); + bpp = connector->panel.vbt.edp.bpp; } } @@ -1700,13 +1715,13 @@ static int intel_dp_max_bpp(struct intel_dp *intel_dp, static bool has_seamless_m_n(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); /* * Seamless M/N reprogramming only implemented * for BDW+ double buffered M/N registers so far. */ - return HAS_DOUBLE_BUFFERED_M_N(i915) && + return HAS_DOUBLE_BUFFERED_M_N(display) && intel_panel_drrs_type(connector) == DRRS_TYPE_SEAMLESS; } @@ -1768,13 +1783,12 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, return -EINVAL; } -static -u8 intel_dp_dsc_max_src_input_bpc(struct drm_i915_private *i915) +int intel_dp_dsc_max_src_input_bpc(struct intel_display *display) { /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ - if (DISPLAY_VER(i915) >= 12) + if (DISPLAY_VER(display) >= 12) return 12; - if (DISPLAY_VER(i915) == 11) + if (DISPLAY_VER(display) == 11) return 10; return 0; @@ -1783,17 +1797,17 @@ u8 intel_dp_dsc_max_src_input_bpc(struct drm_i915_private *i915) int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, u8 max_req_bpc) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); int i, num_bpc; u8 dsc_bpc[3] = {}; - u8 dsc_max_bpc; + int dsc_max_bpc; - dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(i915); + dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(display); if (!dsc_max_bpc) return dsc_max_bpc; - dsc_max_bpc = min_t(u8, dsc_max_bpc, max_req_bpc); + dsc_max_bpc = min(dsc_max_bpc, max_req_bpc); num_bpc = drm_dp_dsc_sink_supported_input_bpcs(connector->dp.dsc_dpcd, dsc_bpc); @@ -1805,9 +1819,9 @@ int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, return 0; } -static int intel_dp_source_dsc_version_minor(struct drm_i915_private *i915) +static int intel_dp_source_dsc_version_minor(struct intel_display *display) { - return DISPLAY_VER(i915) >= 14 ? 2 : 1; + return DISPLAY_VER(display) >= 14 ? 2 : 1; } static int intel_dp_sink_dsc_version_minor(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) @@ -1841,7 +1855,7 @@ static int intel_dp_get_slice_height(int vactive) static int intel_dp_dsc_compute_params(const struct intel_connector *connector, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; int ret; @@ -1864,7 +1878,7 @@ static int intel_dp_dsc_compute_params(const struct intel_connector *connector, (connector->dp.dsc_dpcd[DP_DSC_REV - DP_DSC_SUPPORT] & DP_DSC_MAJOR_MASK) >> DP_DSC_MAJOR_SHIFT; vdsc_cfg->dsc_version_minor = - min(intel_dp_source_dsc_version_minor(i915), + min(intel_dp_source_dsc_version_minor(display), intel_dp_sink_dsc_version_minor(connector->dp.dsc_dpcd)); if (vdsc_cfg->convert_rgb) vdsc_cfg->convert_rgb = @@ -1874,7 +1888,7 @@ static int intel_dp_dsc_compute_params(const struct intel_connector *connector, vdsc_cfg->line_buf_depth = min(INTEL_DP_DSC_MAX_LINE_BUF_DEPTH, drm_dp_dsc_sink_line_buf_depth(connector->dp.dsc_dpcd)); if (!vdsc_cfg->line_buf_depth) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DSC Sink Line Buffer Depth invalid\n"); return -EINVAL; } @@ -1889,7 +1903,7 @@ static int intel_dp_dsc_compute_params(const struct intel_connector *connector, static bool intel_dp_dsc_supports_format(const struct intel_connector *connector, enum intel_output_format output_format) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); u8 sink_dsc_format; switch (output_format) { @@ -1900,7 +1914,7 @@ static bool intel_dp_dsc_supports_format(const struct intel_connector *connector sink_dsc_format = DP_DSC_YCbCr444; break; case INTEL_OUTPUT_FORMAT_YCBCR420: - if (min(intel_dp_source_dsc_version_minor(i915), + if (min(intel_dp_source_dsc_version_minor(display), intel_dp_sink_dsc_version_minor(connector->dp.dsc_dpcd)) < 2) return false; sink_dsc_format = DP_DSC_YCbCr420_Native; @@ -1961,7 +1975,7 @@ static int dsc_compute_link_config(struct intel_dp *intel_dp, static u16 intel_dp_dsc_max_sink_compressed_bppx16(const struct intel_connector *connector, - struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *pipe_config, int bpc) { u16 max_bppx16 = drm_edp_dsc_sink_output_bpp(connector->dp.dsc_dpcd); @@ -1986,7 +2000,7 @@ u16 intel_dp_dsc_max_sink_compressed_bppx16(const struct intel_connector *connec return 0; } -int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config) +int intel_dp_dsc_sink_min_compressed_bpp(const struct intel_crtc_state *pipe_config) { /* From Mandatory bit rate range Support Table 2-157 (DP v2.0) */ switch (pipe_config->output_format) { @@ -2004,7 +2018,7 @@ int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config) } int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector, - struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *pipe_config, int bpc) { return intel_dp_dsc_max_sink_compressed_bppx16(connector, @@ -2019,13 +2033,22 @@ static int dsc_src_min_compressed_bpp(void) static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); + + /* + * Forcing DSC and using the platform's max compressed bpp is seen to cause + * underruns. Since DSC isn't needed in these cases, limit the + * max compressed bpp to 18, which is a safe value across platforms with different + * pipe bpps. + */ + if (intel_dp->force_dsc_en) + return 18; /* * Max Compressed bpp for Gen 13+ is 27bpp. * For earlier platform is 23bpp. (Bspec:49259). */ - if (DISPLAY_VER(i915) < 13) + if (DISPLAY_VER(display) < 13) return 23; else return 27; @@ -2086,13 +2109,13 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, int pipe_bpp, int timeslots) { + struct intel_display *display = to_intel_display(intel_dp); u8 bppx16_incr = drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); u16 compressed_bppx16; u8 bppx16_step; int ret; - if (DISPLAY_VER(i915) < 14 || bppx16_incr <= 1) + if (DISPLAY_VER(display) < 14 || bppx16_incr <= 1) bppx16_step = 16; else bppx16_step = 16 / bppx16_incr; @@ -2116,7 +2139,8 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16; if (intel_dp->force_dsc_fractional_bpp_en && fxp_q4_to_frac(compressed_bppx16)) - drm_dbg_kms(&i915->drm, "Forcing DSC fractional bpp\n"); + drm_dbg_kms(display->drm, + "Forcing DSC fractional bpp\n"); return 0; } @@ -2131,68 +2155,46 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, int pipe_bpp, int timeslots) { + struct intel_display *display = to_intel_display(intel_dp); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - int dsc_src_min_bpp, dsc_sink_min_bpp, dsc_min_bpp; - int dsc_src_max_bpp, dsc_sink_max_bpp, dsc_max_bpp; + int dsc_min_bpp; + int dsc_max_bpp; int dsc_joiner_max_bpp; int num_joined_pipes = intel_crtc_num_joined_pipes(pipe_config); - dsc_src_min_bpp = dsc_src_min_compressed_bpp(); - dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); - dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = fxp_q4_to_int_roundup(limits->link.min_bpp_x16); - dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); - dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, - pipe_config, - pipe_bpp / 3); - dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; - - dsc_joiner_max_bpp = get_max_compressed_bpp_with_joiner(i915, adjusted_mode->clock, + dsc_joiner_max_bpp = get_max_compressed_bpp_with_joiner(display, adjusted_mode->clock, adjusted_mode->hdisplay, num_joined_pipes); - dsc_max_bpp = min(dsc_max_bpp, dsc_joiner_max_bpp); - dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); + dsc_max_bpp = min(dsc_joiner_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return xelpd_dsc_compute_link_config(intel_dp, connector, pipe_config, limits, dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots); return icl_dsc_compute_link_config(intel_dp, pipe_config, limits, dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots); } -static -u8 intel_dp_dsc_min_src_input_bpc(struct drm_i915_private *i915) +int intel_dp_dsc_min_src_input_bpc(void) { /* Min DSC Input BPC for ICL+ is 8 */ - return HAS_DSC(i915) ? 8 : 0; + return 8; } static -bool is_dsc_pipe_bpp_sufficient(struct drm_i915_private *i915, - struct drm_connector_state *conn_state, - struct link_config_limits *limits, +bool is_dsc_pipe_bpp_sufficient(struct link_config_limits *limits, int pipe_bpp) { - u8 dsc_max_bpc, dsc_min_bpc, dsc_max_pipe_bpp, dsc_min_pipe_bpp; - - dsc_max_bpc = min(intel_dp_dsc_max_src_input_bpc(i915), conn_state->max_requested_bpc); - dsc_min_bpc = intel_dp_dsc_min_src_input_bpc(i915); - - dsc_max_pipe_bpp = min(dsc_max_bpc * 3, limits->pipe.max_bpp); - dsc_min_pipe_bpp = max(dsc_min_bpc * 3, limits->pipe.min_bpp); - - return pipe_bpp >= dsc_min_pipe_bpp && - pipe_bpp <= dsc_max_pipe_bpp; + return pipe_bpp >= limits->pipe.min_bpp && + pipe_bpp <= limits->pipe.max_bpp; } static int intel_dp_force_dsc_pipe_bpp(struct intel_dp *intel_dp, - struct drm_connector_state *conn_state, struct link_config_limits *limits) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int forced_bpp; if (!intel_dp->force_dsc_bpc) @@ -2200,12 +2202,14 @@ int intel_dp_force_dsc_pipe_bpp(struct intel_dp *intel_dp, forced_bpp = intel_dp->force_dsc_bpc * 3; - if (is_dsc_pipe_bpp_sufficient(i915, conn_state, limits, forced_bpp)) { - drm_dbg_kms(&i915->drm, "Input DSC BPC forced to %d\n", intel_dp->force_dsc_bpc); + if (is_dsc_pipe_bpp_sufficient(limits, forced_bpp)) { + drm_dbg_kms(display->drm, "Input DSC BPC forced to %d\n", + intel_dp->force_dsc_bpc); return forced_bpp; } - drm_dbg_kms(&i915->drm, "Cannot force DSC BPC:%d, due to DSC BPC limits\n", + drm_dbg_kms(display->drm, + "Cannot force DSC BPC:%d, due to DSC BPC limits\n", intel_dp->force_dsc_bpc); return 0; @@ -2217,17 +2221,15 @@ static int intel_dp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, struct link_config_limits *limits, int timeslots) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); const struct intel_connector *connector = to_intel_connector(conn_state->connector); - u8 max_req_bpc = conn_state->max_requested_bpc; - u8 dsc_max_bpc, dsc_max_bpp; - u8 dsc_min_bpc, dsc_min_bpp; + int dsc_max_bpp; + int dsc_min_bpp; u8 dsc_bpc[3] = {}; int forced_bpp, pipe_bpp; int num_bpc, i, ret; - forced_bpp = intel_dp_force_dsc_pipe_bpp(intel_dp, conn_state, limits); + forced_bpp = intel_dp_force_dsc_pipe_bpp(intel_dp, limits); if (forced_bpp) { ret = dsc_compute_compressed_bpp(intel_dp, connector, pipe_config, @@ -2238,15 +2240,8 @@ static int intel_dp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, } } - dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(i915); - if (!dsc_max_bpc) - return -EINVAL; - - dsc_max_bpc = min_t(u8, dsc_max_bpc, max_req_bpc); - dsc_max_bpp = min(dsc_max_bpc * 3, limits->pipe.max_bpp); - - dsc_min_bpc = intel_dp_dsc_min_src_input_bpc(i915); - dsc_min_bpp = max(dsc_min_bpc * 3, limits->pipe.min_bpp); + dsc_max_bpp = limits->pipe.max_bpp; + dsc_min_bpp = limits->pipe.min_bpp; /* * Get the maximum DSC bpc that will be supported by any valid @@ -2275,24 +2270,24 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, struct drm_connector_state *conn_state, struct link_config_limits *limits) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = to_intel_connector(conn_state->connector); int pipe_bpp, forced_bpp; - int dsc_src_min_bpp, dsc_sink_min_bpp, dsc_min_bpp; - int dsc_src_max_bpp, dsc_sink_max_bpp, dsc_max_bpp; + int dsc_min_bpp; + int dsc_max_bpp; - forced_bpp = intel_dp_force_dsc_pipe_bpp(intel_dp, conn_state, limits); + forced_bpp = intel_dp_force_dsc_pipe_bpp(intel_dp, limits); if (forced_bpp) { pipe_bpp = forced_bpp; } else { - int max_bpc = min(limits->pipe.max_bpp / 3, (int)conn_state->max_requested_bpc); + int max_bpc = limits->pipe.max_bpp / 3; /* For eDP use max bpp that can be supported with DSC. */ pipe_bpp = intel_dp_dsc_compute_max_bpp(connector, max_bpc); - if (!is_dsc_pipe_bpp_sufficient(i915, conn_state, limits, pipe_bpp)) { - drm_dbg_kms(&i915->drm, + if (!is_dsc_pipe_bpp_sufficient(limits, pipe_bpp)) { + drm_dbg_kms(display->drm, "Computed BPC is not in DSC BPC limits\n"); return -EINVAL; } @@ -2300,17 +2295,9 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, pipe_config->port_clock = limits->max_rate; pipe_config->lane_count = limits->max_lane_count; - dsc_src_min_bpp = dsc_src_min_compressed_bpp(); - dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); - dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = fxp_q4_to_int_roundup(limits->link.min_bpp_x16); - dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); - dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, - pipe_config, - pipe_bpp / 3); - dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; - dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); + dsc_max_bpp = fxp_q4_to_int(limits->link.max_bpp_x16); /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); @@ -2323,6 +2310,26 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, return 0; } +static void intel_dp_fec_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + if (crtc_state->fec_enable) + return; + + /* + * Though eDP v1.5 supports FEC with DSC, unlike DP, it is optional. + * Since, FEC is a bandwidth overhead, continue to not enable it for + * eDP. Until, there is a good reason to do so. + */ + if (intel_dp_is_edp(intel_dp)) + return; + + if (intel_dp_is_uhbr(crtc_state)) + return; + + crtc_state->fec_enable = true; +} + int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state, @@ -2330,8 +2337,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, int timeslots, bool compute_pipe_bpp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + struct intel_display *display = to_intel_display(intel_dp); const struct intel_connector *connector = to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = @@ -2339,18 +2345,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, int num_joined_pipes = intel_crtc_num_joined_pipes(pipe_config); int ret; - /* - * Though eDP v1.5 supports FEC with DSC, unlike DP, it is optional. - * Since, FEC is a bandwidth overhead, continue to not enable it for - * eDP. Until, there is a good reason to do so. - */ - pipe_config->fec_enable = pipe_config->fec_enable || - (!intel_dp_is_edp(intel_dp) && - intel_dp_supports_fec(intel_dp, connector, pipe_config) && - !intel_dp_is_uhbr(pipe_config)); - - if (!intel_dp_supports_dsc(connector, pipe_config)) - return -EINVAL; + intel_dp_fec_compute_config(intel_dp, pipe_config); if (!intel_dp_dsc_supports_format(connector, pipe_config->output_format)) return -EINVAL; @@ -2369,7 +2364,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, ret = intel_dp_dsc_compute_pipe_bpp(intel_dp, pipe_config, conn_state, limits, timeslots); if (ret) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "No Valid pipe bpp for given mode ret = %d\n", ret); return ret; } @@ -2381,7 +2376,8 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, drm_dp_dsc_sink_max_slice_count(connector->dp.dsc_dpcd, true); if (!pipe_config->dsc.slice_count) { - drm_dbg_kms(&dev_priv->drm, "Unsupported Slice Count %d\n", + drm_dbg_kms(display->drm, + "Unsupported Slice Count %d\n", pipe_config->dsc.slice_count); return -EINVAL; } @@ -2394,7 +2390,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, adjusted_mode->crtc_hdisplay, num_joined_pipes); if (!dsc_dp_slice_count) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Compressed Slice Count not supported\n"); return -EINVAL; } @@ -2405,13 +2401,20 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate * is greater than the maximum Cdclock and if slice count is even * then we need to use 2 VDSC instances. + * In case of Ultrajoiner along with 12 slices we need to use 3 + * VDSC instances. */ - if (pipe_config->joiner_pipes || pipe_config->dsc.slice_count > 1) - pipe_config->dsc.dsc_split = true; + if (pipe_config->joiner_pipes && num_joined_pipes == 4 && + pipe_config->dsc.slice_count == 12) + pipe_config->dsc.num_streams = 3; + else if (pipe_config->joiner_pipes || pipe_config->dsc.slice_count > 1) + pipe_config->dsc.num_streams = 2; + else + pipe_config->dsc.num_streams = 1; ret = intel_dp_dsc_compute_params(connector, pipe_config); if (ret < 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Cannot compute valid DSC parameters for Input Bpp = %d" "Compressed BPP = " FXP_Q4_FMT "\n", pipe_config->pipe_bpp, @@ -2420,7 +2423,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, } pipe_config->dsc.compression_enable = true; - drm_dbg_kms(&dev_priv->drm, "DP DSC computed with Input Bpp = %d " + drm_dbg_kms(display->drm, "DP DSC computed with Input Bpp = %d " "Compressed Bpp = " FXP_Q4_FMT " Slice Count = %d\n", pipe_config->pipe_bpp, FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16), @@ -2429,25 +2432,18 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, return 0; } -/** - * intel_dp_compute_config_link_bpp_limits - compute output link bpp limits - * @intel_dp: intel DP - * @crtc_state: crtc state - * @dsc: DSC compression mode - * @limits: link configuration limits - * - * Calculates the output link min, max bpp values in @limits based on the - * pipe bpp range, @crtc_state and @dsc mode. - * - * Returns %true in case of success. +/* + * Calculate the output link min, max bpp values in limits based on the pipe bpp + * range, crtc_state and dsc mode. Return true on success. */ -bool +static bool intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, + const struct intel_connector *connector, const struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(intel_dp); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -2465,17 +2461,27 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, limits->link.min_bpp_x16 = fxp_q4_from_int(limits->pipe.min_bpp); } else { - /* - * TODO: set the DSC link limits already here, atm these are - * initialized only later in intel_edp_dsc_compute_pipe_bpp() / - * intel_dp_dsc_compute_pipe_bpp() - */ - limits->link.min_bpp_x16 = 0; + int dsc_src_min_bpp, dsc_sink_min_bpp, dsc_min_bpp; + int dsc_src_max_bpp, dsc_sink_max_bpp, dsc_max_bpp; + + dsc_src_min_bpp = dsc_src_min_compressed_bpp(); + dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); + dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); + limits->link.min_bpp_x16 = fxp_q4_from_int(dsc_min_bpp); + + dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); + dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, + crtc_state, + limits->pipe.max_bpp / 3); + dsc_max_bpp = dsc_sink_max_bpp ? + min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; + + max_link_bpp_x16 = min(max_link_bpp_x16, fxp_q4_from_int(dsc_max_bpp)); } limits->link.max_bpp_x16 = max_link_bpp_x16; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s][CRTC:%d:%s] DP link limits: pixel clock %d kHz DSC %s max lanes %d max rate %d max pipe_bpp %d max link_bpp " FXP_Q4_FMT "\n", encoder->base.base.id, encoder->base.name, crtc->base.base.id, crtc->base.name, @@ -2489,29 +2495,62 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, return true; } -static bool +static void +intel_dp_dsc_compute_pipe_bpp_limits(struct intel_dp *intel_dp, + struct link_config_limits *limits) +{ + struct intel_display *display = to_intel_display(intel_dp); + int dsc_min_bpc = intel_dp_dsc_min_src_input_bpc(); + int dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(display); + + limits->pipe.max_bpp = clamp(limits->pipe.max_bpp, dsc_min_bpc * 3, dsc_max_bpc * 3); + limits->pipe.min_bpp = clamp(limits->pipe.min_bpp, dsc_min_bpc * 3, dsc_max_bpc * 3); +} + +bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, struct link_config_limits *limits) { + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + limits->min_rate = intel_dp_min_link_rate(intel_dp); limits->max_rate = intel_dp_max_link_rate(intel_dp); - /* FIXME 128b/132b SST support missing */ - limits->max_rate = min(limits->max_rate, 810000); + /* FIXME 128b/132b SST+DSC support missing */ + if (!is_mst && dsc) + limits->max_rate = min(limits->max_rate, 810000); limits->min_rate = min(limits->min_rate, limits->max_rate); limits->min_lane_count = intel_dp_min_lane_count(intel_dp); limits->max_lane_count = intel_dp_max_lane_count(intel_dp); limits->pipe.min_bpp = intel_dp_min_bpp(crtc_state->output_format); - limits->pipe.max_bpp = intel_dp_max_bpp(intel_dp, crtc_state, - respect_downstream_limits); + if (is_mst) { + /* + * FIXME: If all the streams can't fit into the link with their + * current pipe_bpp we should reduce pipe_bpp across the board + * until things start to fit. Until then we limit to <= 8bpc + * since that's what was hardcoded for all MST streams + * previously. This hack should be removed once we have the + * proper retry logic in place. + */ + limits->pipe.max_bpp = min(crtc_state->pipe_bpp, 24); + } else { + limits->pipe.max_bpp = intel_dp_max_bpp(intel_dp, crtc_state, + respect_downstream_limits); + } - if (intel_dp->use_max_params) { + if (dsc) + intel_dp_dsc_compute_pipe_bpp_limits(intel_dp, limits); + + if (is_mst || intel_dp->use_max_params) { /* + * For MST we always configure max link bw - the spec doesn't + * seem to suggest we should do otherwise. + * * Use the maximum clock and number of lanes the eDP panel * advertizes being capable of in case the initial fast * optimal params failed us. The panels are generally @@ -2526,6 +2565,7 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, intel_dp_test_compute_config(intel_dp, crtc_state, limits); return intel_dp_compute_config_link_bpp_limits(intel_dp, + intel_dp->attached_connector, crtc_state, dsc, limits); @@ -2542,7 +2582,7 @@ int intel_dp_config_required_rate(const struct intel_crtc_state *crtc_state) return intel_dp_link_required(adjusted_mode->crtc_clock, bpp); } -bool intel_dp_joiner_needs_dsc(struct drm_i915_private *i915, +bool intel_dp_joiner_needs_dsc(struct intel_display *display, int num_joined_pipes) { /* @@ -2551,7 +2591,7 @@ bool intel_dp_joiner_needs_dsc(struct drm_i915_private *i915, * compression. * Ultrajoiner always needs compression. */ - return (!HAS_UNCOMPRESSED_JOINER(i915) && num_joined_pipes == 2) || + return (!HAS_UNCOMPRESSED_JOINER(display) && num_joined_pipes == 2) || num_joined_pipes == 4; } @@ -2561,7 +2601,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state, bool respect_downstream_limits) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -2583,7 +2623,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, if (num_joined_pipes > 1) pipe_config->joiner_pipes = GENMASK(crtc->pipe + num_joined_pipes - 1, crtc->pipe); - joiner_needs_dsc = intel_dp_joiner_needs_dsc(i915, num_joined_pipes); + joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || !intel_dp_compute_config_limits(intel_dp, pipe_config, @@ -2598,12 +2638,25 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, */ ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, conn_state, &limits); + if (!ret && intel_dp_is_uhbr(pipe_config)) + ret = intel_dp_mtp_tu_compute_config(intel_dp, + pipe_config, + pipe_config->pipe_bpp, + pipe_config->pipe_bpp, + conn_state, + 0, false); if (ret) dsc_needed = true; } + if (dsc_needed && !intel_dp_supports_dsc(intel_dp, connector, pipe_config)) { + drm_dbg_kms(display->drm, "DSC required but not available\n"); + return -EINVAL; + } + if (dsc_needed) { - drm_dbg_kms(&i915->drm, "Try DSC (fallback=%s, joiner=%s, force=%s)\n", + drm_dbg_kms(display->drm, + "Try DSC (fallback=%s, joiner=%s, force=%s)\n", str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); @@ -2619,7 +2672,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, return ret; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DP lane count %d clock %d bpp input %d compressed " FXP_Q4_FMT " link rate required %d available %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, @@ -2665,12 +2718,11 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, } } -static bool intel_dp_port_has_audio(struct drm_i915_private *dev_priv, - enum port port) +static bool intel_dp_port_has_audio(struct intel_display *display, enum port port) { - if (IS_G4X(dev_priv)) + if (display->platform.g4x) return false; - if (DISPLAY_VER(dev_priv) < 12 && port == PORT_A) + if (DISPLAY_VER(display) < 12 && port == PORT_A) return false; return true; @@ -2680,8 +2732,7 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc const struct drm_connector_state *conn_state, struct drm_dp_vsc_sdp *vsc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); if (crtc_state->has_panel_replay) { /* @@ -2758,7 +2809,7 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc vsc->bpc = crtc_state->pipe_bpp / 3; /* only RGB pixelformat supports 6 bpc */ - drm_WARN_ON(&dev_priv->drm, + drm_WARN_ON(display->drm, vsc->bpc == 6 && vsc->pixelformat != DP_PIXELFORMAT_RGB); /* all YCbCr are always limited range */ @@ -2848,8 +2899,8 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(intel_dp); int ret; - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct hdmi_drm_infoframe *drm_infoframe = &crtc_state->infoframes.drm.drm; if (!conn_state->hdr_output_metadata) @@ -2858,7 +2909,8 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, ret = drm_hdmi_infoframe_set_hdr_metadata(drm_infoframe, conn_state); if (ret) { - drm_dbg_kms(&dev_priv->drm, "couldn't set HDR metadata in infoframe\n"); + drm_dbg_kms(display->drm, + "couldn't set HDR metadata in infoframe\n"); return; } @@ -2900,6 +2952,7 @@ intel_dp_drrs_compute_config(struct intel_connector *connector, struct intel_crtc_state *pipe_config, int link_bpp_x16) { + struct intel_display *display = to_intel_display(connector); struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *downclock_mode = intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode); @@ -2918,7 +2971,8 @@ intel_dp_drrs_compute_config(struct intel_connector *connector, return; } - if (IS_IRONLAKE(i915) || IS_SANDYBRIDGE(i915) || IS_IVYBRIDGE(i915)) + if (display->platform.ironlake || display->platform.sandybridge || + display->platform.ivybridge) pipe_config->msa_timing_delay = connector->panel.vbt.edp.drrs_msa_timing_delay; pipe_config->has_drrs = true; @@ -2940,13 +2994,13 @@ intel_dp_drrs_compute_config(struct intel_connector *connector, static bool intel_dp_has_audio(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); const struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); struct intel_connector *connector = to_intel_connector(conn_state->connector); - if (!intel_dp_port_has_audio(i915, encoder->port)) + if (!intel_dp_port_has_audio(display, encoder->port)) return false; if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) @@ -2961,7 +3015,7 @@ intel_dp_compute_output_format(struct intel_encoder *encoder, struct drm_connector_state *conn_state, bool respect_downstream_limits) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_connector *connector = intel_dp->attached_connector; const struct drm_display_info *info = &connector->base.display_info; @@ -2972,7 +3026,7 @@ intel_dp_compute_output_format(struct intel_encoder *encoder, ycbcr_420_only = drm_mode_is_420_only(info, adjusted_mode); if (ycbcr_420_only && !connector->base.ycbcr_420_allowed) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "YCbCr 4:2:0 mode but YCbCr 4:2:0 output not possible. Falling back to RGB.\n"); crtc_state->sink_format = INTEL_OUTPUT_FORMAT_RGB; } else { @@ -3056,7 +3110,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -3064,9 +3118,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_connector *connector = intel_dp->attached_connector; int ret = 0, link_bpp_x16; - if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && encoder->port != PORT_A) - pipe_config->has_pch_encoder = true; - fixed_mode = intel_panel_fixed_mode(connector, adjusted_mode); if (intel_dp_is_edp(intel_dp) && fixed_mode) { ret = intel_panel_compute_config(connector, adjusted_mode); @@ -3084,7 +3135,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; - if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay)) + if (intel_dp_hdisplay_bad(display, adjusted_mode->crtc_hdisplay)) return -EINVAL; /* @@ -3107,8 +3158,13 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - pipe_config->enhanced_framing = - drm_dp_enhanced_frame_cap(intel_dp->dpcd); + if (intel_dp_is_uhbr(pipe_config)) { + /* 128b/132b SST also needs this */ + pipe_config->mst_master_transcoder = pipe_config->cpu_transcoder; + } else { + pipe_config->enhanced_framing = + drm_dp_enhanced_frame_cap(intel_dp->dpcd); + } if (pipe_config->dsc.compression_enable) link_bpp_x16 = pipe_config->dsc.compressed_bpp_x16; @@ -3124,7 +3180,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->splitter.link_count = n; pipe_config->splitter.pixel_overlap = overlap; - drm_dbg_kms(&dev_priv->drm, "MSO link count %d, pixel overlap %d\n", + drm_dbg_kms(display->drm, + "MSO link count %d, pixel overlap %d\n", n, overlap); adjusted_mode->crtc_hdisplay = adjusted_mode->crtc_hdisplay / n + overlap; @@ -3138,20 +3195,19 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_dp_audio_compute_config(encoder, pipe_config, conn_state); - intel_link_compute_m_n(link_bpp_x16, - pipe_config->lane_count, - adjusted_mode->crtc_clock, - pipe_config->port_clock, - intel_dp_bw_fec_overhead(pipe_config->fec_enable), - &pipe_config->dp_m_n); + if (!intel_dp_is_uhbr(pipe_config)) { + intel_link_compute_m_n(link_bpp_x16, + pipe_config->lane_count, + adjusted_mode->crtc_clock, + pipe_config->port_clock, + intel_dp_bw_fec_overhead(pipe_config->fec_enable), + &pipe_config->dp_m_n); + } /* FIXME: abstract this better */ if (pipe_config->splitter.enable) pipe_config->dp_m_n.data_m *= pipe_config->splitter.link_count; - if (!HAS_DDI(dev_priv)) - g4x_dp_set_clock(encoder, pipe_config); - intel_vrr_compute_config(pipe_config, conn_state); intel_dp_compute_as_sdp(intel_dp, pipe_config); intel_psr_compute_config(intel_dp, pipe_config, conn_state); @@ -3188,13 +3244,13 @@ void intel_dp_reset_link_params(struct intel_dp *intel_dp) void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder)); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (!intel_dp_is_edp(intel_dp)) return; - drm_dbg_kms(&i915->drm, "\n"); + drm_dbg_kms(display->drm, "\n"); intel_backlight_enable(crtc_state, conn_state); intel_pps_backlight_on(intel_dp); @@ -3204,12 +3260,12 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); if (!intel_dp_is_edp(intel_dp)) return; - drm_dbg_kms(&i915->drm, "\n"); + drm_dbg_kms(display->drm, "\n"); intel_pps_backlight_off(intel_dp); intel_backlight_disable(old_conn_state); @@ -3252,11 +3308,11 @@ static void intel_dp_sink_set_dsc_decompression(struct intel_connector *connector, bool enable) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); if (write_dsc_decompression_flag(connector->dp.dsc_decompression_aux, DP_DECOMPRESSION_EN, enable) < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Failed to %s sink decompression state\n", str_enable_disable(enable)); } @@ -3265,7 +3321,7 @@ static void intel_dp_sink_set_dsc_passthrough(const struct intel_connector *connector, bool enable) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_dp_aux *aux = connector->port ? connector->port->passthrough_aux : NULL; @@ -3274,7 +3330,7 @@ intel_dp_sink_set_dsc_passthrough(const struct intel_connector *connector, if (write_dsc_decompression_flag(aux, DP_DSC_PASSTHROUGH_EN, enable) < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Failed to %s sink compression passthrough state\n", str_enable_disable(enable)); } @@ -3283,7 +3339,7 @@ static int intel_dp_dsc_aux_ref_count(struct intel_atomic_state *state, const struct intel_connector *connector, bool for_get_ref) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct drm_connector *_connector_iter; struct drm_connector_state *old_conn_state; struct drm_connector_state *new_conn_state; @@ -3308,7 +3364,7 @@ static int intel_dp_dsc_aux_ref_count(struct intel_atomic_state *state, if (!connector_iter->dp.dsc_decompression_enabled) continue; - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, (for_get_ref && !new_conn_state->crtc) || (!for_get_ref && !old_conn_state->crtc)); @@ -3355,12 +3411,12 @@ void intel_dp_sink_enable_decompression(struct intel_atomic_state *state, struct intel_connector *connector, const struct intel_crtc_state *new_crtc_state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); if (!new_crtc_state->dsc.compression_enable) return; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !connector->dp.dsc_decompression_aux || connector->dp.dsc_decompression_enabled)) return; @@ -3386,12 +3442,12 @@ void intel_dp_sink_disable_decompression(struct intel_atomic_state *state, struct intel_connector *connector, const struct intel_crtc_state *old_crtc_state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); if (!old_crtc_state->dsc.compression_enable) return; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !connector->dp.dsc_decompression_aux || !connector->dp.dsc_decompression_enabled)) return; @@ -3406,7 +3462,7 @@ void intel_dp_sink_disable_decompression(struct intel_atomic_state *state, static void intel_dp_init_source_oui(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 oui[] = { 0x00, 0xaa, 0x01 }; u8 buf[3] = {}; @@ -3420,7 +3476,7 @@ intel_dp_init_source_oui(struct intel_dp *intel_dp) * already set to what we want, so as to avoid clearing any state by accident */ if (drm_dp_dpcd_read(&intel_dp->aux, DP_SOURCE_OUI, buf, sizeof(buf)) < 0) - drm_err(&i915->drm, "Failed to read source OUI\n"); + drm_dbg_kms(display->drm, "Failed to read source OUI\n"); if (memcmp(oui, buf, sizeof(oui)) == 0) { /* Assume the OUI was written now. */ @@ -3429,7 +3485,7 @@ intel_dp_init_source_oui(struct intel_dp *intel_dp) } if (drm_dp_dpcd_write(&intel_dp->aux, DP_SOURCE_OUI, oui, sizeof(oui)) < 0) { - drm_info(&i915->drm, "Failed to write source OUI\n"); + drm_dbg_kms(display->drm, "Failed to write source OUI\n"); WRITE_ONCE(intel_dp->oui_valid, false); } @@ -3443,10 +3499,11 @@ void intel_dp_invalidate_source_oui(struct intel_dp *intel_dp) void intel_dp_wait_source_oui(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Performing OUI wait (%u ms)\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Performing OUI wait (%u ms)\n", connector->base.base.id, connector->base.name, connector->panel.vbt.backlight.hdr_dpcd_refresh_timeout); @@ -3457,8 +3514,8 @@ void intel_dp_wait_source_oui(struct intel_dp *intel_dp) /* If the device supports it, try to set the power state appropriately */ void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_i915_private *i915 = to_i915(encoder->base.dev); int ret, i; /* Should have a valid DPCD by this point */ @@ -3494,7 +3551,8 @@ void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode) } if (ret != 1) - drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] Set power to %s failed\n", + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Set power to %s failed\n", encoder->base.base.id, encoder->base.name, mode == DP_SET_POWER_D0 ? "D0" : "D3"); } @@ -3537,7 +3595,7 @@ void intel_dp_sync_state(struct intel_encoder *encoder, bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); bool fastset = true; @@ -3547,7 +3605,8 @@ bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, */ if (intel_dp_rate_index(intel_dp->source_rates, intel_dp->num_source_rates, crtc_state->port_clock) < 0) { - drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] Forcing full modeset due to unsupported link rate\n", + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Forcing full modeset due to unsupported link rate\n", encoder->base.base.id, encoder->base.name); crtc_state->uapi.connectors_changed = true; fastset = false; @@ -3561,14 +3620,15 @@ bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, * Remove once we have readout for DSC. */ if (crtc_state->dsc.compression_enable) { - drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] Forcing full modeset due to DSC being enabled\n", + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Forcing full modeset due to DSC being enabled\n", encoder->base.base.id, encoder->base.name); crtc_state->uapi.mode_changed = true; fastset = false; } if (CAN_PANEL_REPLAY(intel_dp)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] Forcing full modeset to compute panel replay state\n", encoder->base.base.id, encoder->base.name); crtc_state->uapi.mode_changed = true; @@ -3580,7 +3640,7 @@ bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, static void intel_dp_get_pcon_dsc_cap(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); /* Clear the cached register set to avoid using stale values */ @@ -3589,10 +3649,10 @@ static void intel_dp_get_pcon_dsc_cap(struct intel_dp *intel_dp) if (drm_dp_dpcd_read(&intel_dp->aux, DP_PCON_DSC_ENCODER, intel_dp->pcon_dsc_dpcd, sizeof(intel_dp->pcon_dsc_dpcd)) < 0) - drm_err(&i915->drm, "Failed to read DPCD register 0x%x\n", + drm_err(display->drm, "Failed to read DPCD register 0x%x\n", DP_PCON_DSC_ENCODER); - drm_dbg_kms(&i915->drm, "PCON ENCODER DSC DPCD: %*ph\n", + drm_dbg_kms(display->drm, "PCON ENCODER DSC DPCD: %*ph\n", (int)sizeof(intel_dp->pcon_dsc_dpcd), intel_dp->pcon_dsc_dpcd); } @@ -3630,19 +3690,19 @@ static int intel_dp_pcon_set_frl_mask(int max_frl) static int intel_dp_hdmi_sink_max_frl(struct intel_dp *intel_dp) { - struct intel_connector *intel_connector = intel_dp->attached_connector; - struct drm_connector *connector = &intel_connector->base; + struct intel_connector *connector = intel_dp->attached_connector; + const struct drm_display_info *info = &connector->base.display_info; int max_frl_rate; int max_lanes, rate_per_lane; int max_dsc_lanes, dsc_rate_per_lane; - max_lanes = connector->display_info.hdmi.max_lanes; - rate_per_lane = connector->display_info.hdmi.max_frl_rate_per_lane; + max_lanes = info->hdmi.max_lanes; + rate_per_lane = info->hdmi.max_frl_rate_per_lane; max_frl_rate = max_lanes * rate_per_lane; - if (connector->display_info.hdmi.dsc_cap.v_1p2) { - max_dsc_lanes = connector->display_info.hdmi.dsc_cap.max_lanes; - dsc_rate_per_lane = connector->display_info.hdmi.dsc_cap.max_frl_rate_per_lane; + if (info->hdmi.dsc_cap.v_1p2) { + max_dsc_lanes = info->hdmi.dsc_cap.max_lanes; + dsc_rate_per_lane = info->hdmi.dsc_cap.max_frl_rate_per_lane; if (max_dsc_lanes && dsc_rate_per_lane) max_frl_rate = min(max_frl_rate, max_dsc_lanes * dsc_rate_per_lane); } @@ -3664,19 +3724,19 @@ intel_dp_pcon_is_frl_trained(struct intel_dp *intel_dp, static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); #define TIMEOUT_FRL_READY_MS 500 #define TIMEOUT_HDMI_LINK_ACTIVE_MS 1000 - - struct drm_i915_private *i915 = dp_to_i915(intel_dp); int max_frl_bw, max_pcon_frl_bw, max_edid_frl_bw, ret; u8 max_frl_bw_mask = 0, frl_trained_mask; bool is_active; max_pcon_frl_bw = intel_dp->dfp.pcon_max_frl_bw; - drm_dbg(&i915->drm, "PCON max rate = %d Gbps\n", max_pcon_frl_bw); + drm_dbg(display->drm, "PCON max rate = %d Gbps\n", max_pcon_frl_bw); max_edid_frl_bw = intel_dp_hdmi_sink_max_frl(intel_dp); - drm_dbg(&i915->drm, "Sink max rate from EDID = %d Gbps\n", max_edid_frl_bw); + drm_dbg(display->drm, "Sink max rate from EDID = %d Gbps\n", + max_edid_frl_bw); max_frl_bw = min(max_edid_frl_bw, max_pcon_frl_bw); @@ -3684,7 +3744,7 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) return -EINVAL; max_frl_bw_mask = intel_dp_pcon_set_frl_mask(max_frl_bw); - drm_dbg(&i915->drm, "MAX_FRL_BW_MASK = %u\n", max_frl_bw_mask); + drm_dbg(display->drm, "MAX_FRL_BW_MASK = %u\n", max_frl_bw_mask); if (intel_dp_pcon_is_frl_trained(intel_dp, max_frl_bw_mask, &frl_trained_mask)) goto frl_trained; @@ -3721,10 +3781,11 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) return -ETIMEDOUT; frl_trained: - drm_dbg(&i915->drm, "FRL_TRAINED_MASK = %u\n", frl_trained_mask); + drm_dbg(display->drm, "FRL_TRAINED_MASK = %u\n", frl_trained_mask); intel_dp->frl.trained_rate_gbps = intel_dp_pcon_get_frl_mask(frl_trained_mask); intel_dp->frl.is_trained = true; - drm_dbg(&i915->drm, "FRL trained with : %d Gbps\n", intel_dp->frl.trained_rate_gbps); + drm_dbg(display->drm, "FRL trained with : %d Gbps\n", + intel_dp->frl.trained_rate_gbps); return 0; } @@ -3763,7 +3824,7 @@ int intel_dp_pcon_set_tmds_mode(struct intel_dp *intel_dp) void intel_dp_check_frl_training(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); /* * Always go for FRL training if: @@ -3778,14 +3839,16 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp) if (intel_dp_pcon_start_frl_training(intel_dp) < 0) { int ret, mode; - drm_dbg(&dev_priv->drm, "Couldn't set FRL mode, continuing with TMDS mode\n"); + drm_dbg(display->drm, + "Couldn't set FRL mode, continuing with TMDS mode\n"); ret = intel_dp_pcon_set_tmds_mode(intel_dp); mode = drm_dp_pcon_hdmi_link_mode(&intel_dp->aux, NULL); if (ret < 0 || mode != DP_PCON_HDMI_MODE_TMDS) - drm_dbg(&dev_priv->drm, "Issue with PCON, cannot set TMDS mode\n"); + drm_dbg(display->drm, + "Issue with PCON, cannot set TMDS mode\n"); } else { - drm_dbg(&dev_priv->drm, "FRL training Completed\n"); + drm_dbg(display->drm, "FRL training Completed\n"); } } @@ -3801,10 +3864,10 @@ static int intel_dp_pcon_dsc_enc_slices(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct intel_connector *intel_connector = intel_dp->attached_connector; - struct drm_connector *connector = &intel_connector->base; - int hdmi_throughput = connector->display_info.hdmi.dsc_cap.clk_per_slice; - int hdmi_max_slices = connector->display_info.hdmi.dsc_cap.max_slices; + struct intel_connector *connector = intel_dp->attached_connector; + const struct drm_display_info *info = &connector->base.display_info; + int hdmi_throughput = info->hdmi.dsc_cap.clk_per_slice; + int hdmi_max_slices = info->hdmi.dsc_cap.max_slices; int pcon_max_slices = drm_dp_pcon_dsc_max_slices(intel_dp->pcon_dsc_dpcd); int pcon_max_slice_width = drm_dp_pcon_dsc_max_slice_width(intel_dp->pcon_dsc_dpcd); @@ -3818,13 +3881,13 @@ intel_dp_pcon_dsc_enc_bpp(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, int num_slices, int slice_width) { - struct intel_connector *intel_connector = intel_dp->attached_connector; - struct drm_connector *connector = &intel_connector->base; + struct intel_connector *connector = intel_dp->attached_connector; + const struct drm_display_info *info = &connector->base.display_info; int output_format = crtc_state->output_format; - bool hdmi_all_bpp = connector->display_info.hdmi.dsc_cap.all_bpp; + bool hdmi_all_bpp = info->hdmi.dsc_cap.all_bpp; int pcon_fractional_bpp = drm_dp_pcon_dsc_bpp_incr(intel_dp->pcon_dsc_dpcd); int hdmi_max_chunk_bytes = - connector->display_info.hdmi.dsc_cap.total_chunk_kbytes * 1024; + info->hdmi.dsc_cap.total_chunk_kbytes * 1024; return intel_hdmi_dsc_get_bpp(pcon_fractional_bpp, slice_width, num_slices, output_format, hdmi_all_bpp, @@ -3835,24 +3898,26 @@ void intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + const struct drm_display_info *info; u8 pps_param[6]; int slice_height; int slice_width; int num_slices; int bits_per_pixel; int ret; - struct intel_connector *intel_connector = intel_dp->attached_connector; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - struct drm_connector *connector; bool hdmi_is_dsc_1_2; if (!intel_dp_is_hdmi_2_1_sink(intel_dp)) return; - if (!intel_connector) + if (!connector) return; - connector = &intel_connector->base; - hdmi_is_dsc_1_2 = connector->display_info.hdmi.dsc_cap.v_1p2; + + info = &connector->base.display_info; + + hdmi_is_dsc_1_2 = info->hdmi.dsc_cap.v_1p2; if (!drm_dp_pcon_enc_is_dsc_1_2(intel_dp->pcon_dsc_dpcd) || !hdmi_is_dsc_1_2) @@ -3883,13 +3948,13 @@ intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp, ret = drm_dp_pcon_pps_override_param(&intel_dp->aux, pps_param); if (ret < 0) - drm_dbg_kms(&i915->drm, "Failed to set pcon DSC\n"); + drm_dbg_kms(display->drm, "Failed to set pcon DSC\n"); } void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); bool ycbcr444_to_420 = false; bool rgb_to_ycbcr = false; u8 tmp; @@ -3904,7 +3969,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_0, tmp) != 1) - drm_dbg_kms(&i915->drm, "Failed to %s protocol converter HDMI mode\n", + drm_dbg_kms(display->drm, + "Failed to %s protocol converter HDMI mode\n", str_enable_disable(intel_dp_has_hdmi_sink(intel_dp))); if (crtc_state->sink_format == INTEL_OUTPUT_FORMAT_YCBCR420) { @@ -3939,14 +4005,14 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Failed to %s protocol converter YCbCr 4:2:0 conversion mode\n", str_enable_disable(intel_dp->dfp.ycbcr_444_to_420)); tmp = rgb_to_ycbcr ? DP_CONVERSION_BT709_RGB_YCBCR_ENABLE : 0; if (drm_dp_pcon_convert_rgb_to_ycbcr(&intel_dp->aux, tmp) < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Failed to %s protocol converter RGB->YCbCr conversion mode\n", str_enable_disable(tmp)); } @@ -3979,7 +4045,7 @@ static void intel_dp_read_dsc_dpcd(struct drm_dp_aux *aux, void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); /* * Clear the cached register set to avoid using stale values @@ -3998,11 +4064,11 @@ void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, struct intel_connector *connector) if (drm_dp_dpcd_readb(connector->dp.dsc_decompression_aux, DP_FEC_CAPABILITY, &connector->dp.fec_capability) < 0) { - drm_err(&i915->drm, "Failed to read FEC DPCD register\n"); + drm_err(display->drm, "Failed to read FEC DPCD register\n"); return; } - drm_dbg_kms(&i915->drm, "FEC CAPABILITY: %x\n", + drm_dbg_kms(display->drm, "FEC CAPABILITY: %x\n", connector->dp.fec_capability); } @@ -4017,10 +4083,10 @@ static void intel_edp_get_dsc_sink_cap(u8 edp_dpcd_rev, struct intel_connector * static void intel_dp_detect_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *connector) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */ - if (!HAS_DSC(i915)) + if (!HAS_DSC(display)) return; if (intel_dp_is_edp(intel_dp)) @@ -4034,8 +4100,8 @@ intel_dp_detect_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *conn static void intel_edp_mso_mode_fixup(struct intel_connector *connector, struct drm_display_mode *mode) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); int n = intel_dp->mso_link_count; int overlap = intel_dp->mso_pixel_overlap; @@ -4050,7 +4116,7 @@ static void intel_edp_mso_mode_fixup(struct intel_connector *connector, drm_mode_set_name(mode); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] using generated MSO mode: " DRM_MODE_FMT "\n", connector->base.base.id, connector->base.name, DRM_MODE_ARG(mode)); @@ -4058,7 +4124,7 @@ static void intel_edp_mso_mode_fixup(struct intel_connector *connector, void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_connector *connector = intel_dp->attached_connector; @@ -4076,7 +4142,7 @@ void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp) * up by the BIOS, and thus we can't get the mode at module * load. */ - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n", pipe_bpp, connector->panel.vbt.edp.bpp); connector->panel.vbt.edp.bpp = pipe_bpp; @@ -4085,7 +4151,7 @@ void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp) static void intel_edp_mso_init(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; struct drm_display_info *info = &connector->base.display_info; u8 mso; @@ -4094,23 +4160,25 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) return; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_MSO_LINK_CAPABILITIES, &mso) != 1) { - drm_err(&i915->drm, "Failed to read MSO cap\n"); + drm_err(display->drm, "Failed to read MSO cap\n"); return; } /* Valid configurations are SST or MSO 2x1, 2x2, 4x1 */ mso &= DP_EDP_MSO_NUMBER_OF_LINKS_MASK; if (mso % 2 || mso > drm_dp_max_lane_count(intel_dp->dpcd)) { - drm_err(&i915->drm, "Invalid MSO link count cap %u\n", mso); + drm_err(display->drm, "Invalid MSO link count cap %u\n", mso); mso = 0; } if (mso) { - drm_dbg_kms(&i915->drm, "Sink MSO %ux%u configuration, pixel overlap %u\n", + drm_dbg_kms(display->drm, + "Sink MSO %ux%u configuration, pixel overlap %u\n", mso, drm_dp_max_lane_count(intel_dp->dpcd) / mso, info->mso_pixel_overlap); - if (!HAS_MSO(i915)) { - drm_err(&i915->drm, "No source MSO support, disabling\n"); + if (!HAS_MSO(display)) { + drm_err(display->drm, + "No source MSO support, disabling\n"); mso = 0; } } @@ -4161,11 +4229,10 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) static bool intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector) { - struct drm_i915_private *dev_priv = - to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + struct intel_display *display = to_intel_display(intel_dp); /* this function is meant to be called only once */ - drm_WARN_ON(&dev_priv->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); + drm_WARN_ON(display->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd) != 0) return false; @@ -4189,7 +4256,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == sizeof(intel_dp->edp_dpcd)) { - drm_dbg_kms(&dev_priv->drm, "eDP DPCD: %*ph\n", + drm_dbg_kms(display->drm, "eDP DPCD: %*ph\n", (int)sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); @@ -4300,9 +4367,9 @@ static enum drm_dp_mst_mode intel_dp_mst_mode_choose(struct intel_dp *intel_dp, enum drm_dp_mst_mode sink_mst_mode) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); - if (!i915->display.params.enable_dp_mst) + if (!display->params.enable_dp_mst) return DRM_DP_SST; if (!intel_dp_mst_source_support(intel_dp)) @@ -4318,7 +4385,7 @@ intel_dp_mst_mode_choose(struct intel_dp *intel_dp, static enum drm_dp_mst_mode intel_dp_mst_detect(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; enum drm_dp_mst_mode sink_mst_mode; enum drm_dp_mst_mode mst_detect; @@ -4327,12 +4394,12 @@ intel_dp_mst_detect(struct intel_dp *intel_dp) mst_detect = intel_dp_mst_mode_choose(intel_dp, sink_mst_mode); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s -> enable: %s\n", encoder->base.base.id, encoder->base.name, str_yes_no(intel_dp_mst_source_support(intel_dp)), intel_dp_mst_mode_str(sink_mst_mode), - str_yes_no(i915->display.params.enable_dp_mst), + str_yes_no(display->params.enable_dp_mst), intel_dp_mst_mode_str(mst_detect)); return mst_detect; @@ -4358,12 +4425,13 @@ intel_dp_mst_configure(struct intel_dp *intel_dp) static void intel_dp_mst_disconnect(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); if (!intel_dp->is_mst) return; - drm_dbg_kms(&i915->drm, "MST device may have disappeared %d vs %d\n", + drm_dbg_kms(display->drm, + "MST device may have disappeared %d vs %d\n", intel_dp->is_mst, intel_dp->mst_mgr.mst_state); intel_dp->is_mst = false; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); @@ -4444,7 +4512,7 @@ static ssize_t intel_dp_as_sdp_pack(const struct drm_dp_as_sdp *as_sdp, } static ssize_t -intel_dp_hdr_metadata_infoframe_sdp_pack(struct drm_i915_private *i915, +intel_dp_hdr_metadata_infoframe_sdp_pack(struct intel_display *display, const struct hdmi_drm_infoframe *drm_infoframe, struct dp_sdp *sdp, size_t size) @@ -4461,12 +4529,13 @@ intel_dp_hdr_metadata_infoframe_sdp_pack(struct drm_i915_private *i915, len = hdmi_drm_infoframe_pack_only(drm_infoframe, buf, sizeof(buf)); if (len < 0) { - drm_dbg_kms(&i915->drm, "buffer size is smaller than hdr metadata infoframe\n"); + drm_dbg_kms(display->drm, + "buffer size is smaller than hdr metadata infoframe\n"); return -ENOSPC; } if (len != infoframe_size) { - drm_dbg_kms(&i915->drm, "wrong static hdr metadata size\n"); + drm_dbg_kms(display->drm, "wrong static hdr metadata size\n"); return -ENOSPC; } @@ -4524,8 +4593,8 @@ static void intel_write_dp_sdp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, unsigned int type) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct dp_sdp sdp = {}; ssize_t len; @@ -4538,7 +4607,7 @@ static void intel_write_dp_sdp(struct intel_encoder *encoder, len = drm_dp_vsc_sdp_pack(&crtc_state->infoframes.vsc, &sdp); break; case HDMI_PACKET_TYPE_GAMUT_METADATA: - len = intel_dp_hdr_metadata_infoframe_sdp_pack(dev_priv, + len = intel_dp_hdr_metadata_infoframe_sdp_pack(display, &crtc_state->infoframes.drm.drm, &sdp, sizeof(sdp)); break; @@ -4551,7 +4620,7 @@ static void intel_write_dp_sdp(struct intel_encoder *encoder, return; } - if (drm_WARN_ON(&dev_priv->drm, len < 0)) + if (drm_WARN_ON(display->drm, len < 0)) return; dig_port->write_infoframe(encoder, crtc_state, type, &sdp, len); @@ -4562,20 +4631,19 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - i915_reg_t reg = HSW_TVIDEO_DIP_CTL(dev_priv, - crtc_state->cpu_transcoder); + struct intel_display *display = to_intel_display(encoder); + i915_reg_t reg = HSW_TVIDEO_DIP_CTL(display, crtc_state->cpu_transcoder); u32 dip_enable = VIDEO_DIP_ENABLE_AVI_HSW | VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW | VIDEO_DIP_ENABLE_GMP_HSW | VIDEO_DIP_ENABLE_SPD_HSW | VIDEO_DIP_ENABLE_DRM_GLK; - if (HAS_AS_SDP(dev_priv)) + if (HAS_AS_SDP(display)) dip_enable |= VIDEO_DIP_ENABLE_AS_ADL; - u32 val = intel_de_read(dev_priv, reg) & ~dip_enable; + u32 val = intel_de_read(display, reg) & ~dip_enable; /* TODO: Sanitize DSC enabling wrt. intel_dsc_dp_pps_write(). */ - if (!enable && HAS_DSC(dev_priv)) + if (!enable && HAS_DSC(display)) val &= ~VDIP_ENABLE_PPS; /* @@ -4585,8 +4653,8 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, if (!enable || !crtc_state->has_psr) val &= ~VIDEO_DIP_ENABLE_VSC_HSW; - intel_de_write(dev_priv, reg, val); - intel_de_posting_read(dev_priv, reg); + intel_de_write(display, reg, val); + intel_de_posting_read(display, reg); if (!enable) return; @@ -4707,8 +4775,8 @@ intel_read_dp_as_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_dp_as_sdp *as_sdp) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); unsigned int type = DP_SDP_ADAPTIVE_SYNC; struct dp_sdp sdp = {}; int ret; @@ -4722,7 +4790,7 @@ intel_read_dp_as_sdp(struct intel_encoder *encoder, ret = intel_dp_as_sdp_unpack(as_sdp, &sdp, sizeof(sdp)); if (ret) - drm_dbg_kms(&dev_priv->drm, "Failed to unpack DP AS SDP\n"); + drm_dbg_kms(display->drm, "Failed to unpack DP AS SDP\n"); } static int @@ -4775,8 +4843,8 @@ static void intel_read_dp_vsc_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_dp_vsc_sdp *vsc) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); unsigned int type = DP_SDP_VSC; struct dp_sdp sdp = {}; int ret; @@ -4790,15 +4858,15 @@ static void intel_read_dp_vsc_sdp(struct intel_encoder *encoder, ret = intel_dp_vsc_sdp_unpack(vsc, &sdp, sizeof(sdp)); if (ret) - drm_dbg_kms(&dev_priv->drm, "Failed to unpack DP VSC SDP\n"); + drm_dbg_kms(display->drm, "Failed to unpack DP VSC SDP\n"); } static void intel_read_dp_hdr_metadata_infoframe_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct hdmi_drm_infoframe *drm_infoframe) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); unsigned int type = HDMI_PACKET_TYPE_GAMUT_METADATA; struct dp_sdp sdp = {}; int ret; @@ -4814,7 +4882,7 @@ static void intel_read_dp_hdr_metadata_infoframe_sdp(struct intel_encoder *encod sizeof(sdp)); if (ret) - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Failed to unpack DP HDR Metadata Infoframe SDP\n"); } @@ -4844,8 +4912,8 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, static bool intel_dp_link_ok(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATUS_SIZE]) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_i915_private *i915 = to_i915(encoder->base.dev); bool uhbr = intel_dp->link_rate >= 1000000; bool ok; @@ -4859,7 +4927,7 @@ static bool intel_dp_link_ok(struct intel_dp *intel_dp, return true; intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] %s link not ok, retraining\n", encoder->base.base.id, encoder->base.name, uhbr ? "128b/132b" : "8b/10b"); @@ -4882,14 +4950,14 @@ intel_dp_mst_hpd_irq(struct intel_dp *intel_dp, u8 *esi, u8 *ack) static bool intel_dp_mst_link_status(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_i915_private *i915 = to_i915(encoder->base.dev); u8 link_status[DP_LINK_STATUS_SIZE] = {}; const size_t esi_link_status_size = DP_LINK_STATUS_SIZE - 2; if (drm_dp_dpcd_read(&intel_dp->aux, DP_LANE0_1_STATUS_ESI, link_status, esi_link_status_size) != esi_link_status_size) { - drm_err(&i915->drm, + drm_err(display->drm, "[ENCODER:%d:%s] Failed to read link status\n", encoder->base.base.id, encoder->base.name); return false; @@ -4915,27 +4983,27 @@ static bool intel_dp_mst_link_status(struct intel_dp *intel_dp) static bool intel_dp_check_mst_status(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; bool link_ok = true; bool reprobe_needed = false; - drm_WARN_ON_ONCE(&i915->drm, intel_dp->active_mst_links < 0); + drm_WARN_ON_ONCE(display->drm, intel_dp->active_mst_links < 0); for (;;) { u8 esi[4] = {}; u8 ack[4] = {}; if (!intel_dp_get_sink_irq_esi(intel_dp, esi)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "failed to get ESI - device may have failed\n"); link_ok = false; break; } - drm_dbg_kms(&i915->drm, "DPRX ESI: %4ph\n", esi); + drm_dbg_kms(display->drm, "DPRX ESI: %4ph\n", esi); if (intel_dp->active_mst_links > 0 && link_ok && esi[3] & LINK_STATUS_CHANGED) { @@ -4947,7 +5015,7 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) intel_dp_mst_hpd_irq(intel_dp, esi, ack); if (esi[3] & DP_TUNNELING_IRQ) { - if (drm_dp_tunnel_handle_irq(i915->display.dp_tunnel_mgr, + if (drm_dp_tunnel_handle_irq(display->dp_tunnel_mgr, &intel_dp->aux)) reprobe_needed = true; ack[3] |= DP_TUNNELING_IRQ; @@ -4957,7 +5025,7 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) break; if (!intel_dp_ack_sink_irq_esi(intel_dp, ack)) - drm_dbg_kms(&i915->drm, "Failed to ack ESI\n"); + drm_dbg_kms(display->drm, "Failed to ack ESI\n"); if (ack[1] & (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY)) drm_dp_mst_hpd_irq_send_new_request(&intel_dp->mst_mgr); @@ -5045,7 +5113,7 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp) bool intel_dp_has_connector(struct intel_dp *intel_dp, const struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder; enum pipe pipe; @@ -5058,7 +5126,7 @@ bool intel_dp_has_connector(struct intel_dp *intel_dp, return true; /* MST */ - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { encoder = &intel_dp->mst_encoders[pipe]->base; if (conn_state->best_encoder == &encoder->base) return true; @@ -5086,14 +5154,14 @@ int intel_dp_get_active_pipes(struct intel_dp *intel_dp, struct drm_modeset_acquire_ctx *ctx, u8 *pipe_mask) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct drm_connector_list_iter conn_iter; struct intel_connector *connector; int ret = 0; *pipe_mask = 0; - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { struct drm_connector_state *conn_state = connector->base.state; @@ -5113,7 +5181,8 @@ int intel_dp_get_active_pipes(struct intel_dp *intel_dp, crtc_state = to_intel_crtc_state(crtc->base.state); - drm_WARN_ON(&i915->drm, !intel_crtc_has_dp_encoder(crtc_state)); + drm_WARN_ON(display->drm, + !intel_crtc_has_dp_encoder(crtc_state)); if (!crtc_state->hw.active) continue; @@ -5143,6 +5212,7 @@ static bool intel_dp_is_connected(struct intel_dp *intel_dp) static int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); u8 pipe_mask; @@ -5151,7 +5221,7 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, if (!intel_dp_is_connected(intel_dp)) return 0; - ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ret = drm_modeset_lock(&display->drm->mode_config.connection_mutex, ctx); if (ret) return ret; @@ -5169,7 +5239,8 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, if (!intel_dp_needs_link_retrain(intel_dp)) return 0; - drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] retraining link (forced %s)\n", + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] retraining link (forced %s)\n", encoder->base.base.id, encoder->base.name, str_yes_no(intel_dp->link.force_retrain)); @@ -5180,7 +5251,7 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, intel_dp->link.force_retrain = false; if (ret) - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] link retraining failed: %pe\n", encoder->base.base.id, encoder->base.name, ERR_PTR(ret)); @@ -5213,7 +5284,7 @@ void intel_dp_check_link_state(struct intel_dp *intel_dp) static void intel_dp_check_device_service_irq(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 val; if (intel_dp->dpcd[DP_DPCD_REV] < 0x11) @@ -5232,12 +5303,12 @@ static void intel_dp_check_device_service_irq(struct intel_dp *intel_dp) intel_hdcp_handle_cp_irq(intel_dp->attached_connector); if (val & DP_SINK_SPECIFIC_IRQ) - drm_dbg_kms(&i915->drm, "Sink specific irq unhandled\n"); + drm_dbg_kms(display->drm, "Sink specific irq unhandled\n"); } static bool intel_dp_check_link_service_irq(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); bool reprobe_needed = false; u8 val; @@ -5249,7 +5320,7 @@ static bool intel_dp_check_link_service_irq(struct intel_dp *intel_dp) return false; if ((val & DP_TUNNELING_IRQ) && - drm_dp_tunnel_handle_irq(i915->display.dp_tunnel_mgr, + drm_dp_tunnel_handle_irq(display->dp_tunnel_mgr, &intel_dp->aux)) reprobe_needed = true; @@ -5318,12 +5389,12 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); u8 *dpcd = intel_dp->dpcd; u8 type; - if (drm_WARN_ON(&i915->drm, intel_dp_is_edp(intel_dp))) + if (drm_WARN_ON(display->drm, intel_dp_is_edp(intel_dp))) return connector_status_connected; lspcon_resume(dig_port); @@ -5366,7 +5437,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) } /* Anything else is out of spec, warn and ignore */ - drm_dbg_kms(&i915->drm, "Broken DP branch device, ignoring\n"); + drm_dbg_kms(display->drm, "Broken DP branch device, ignoring\n"); return connector_status_disconnected; } @@ -5461,7 +5532,7 @@ static void intel_dp_update_dfp(struct intel_dp *intel_dp, const struct drm_edid *drm_edid) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; intel_dp->dfp.max_bpc = @@ -5485,7 +5556,7 @@ intel_dp_update_dfp(struct intel_dp *intel_dp, drm_dp_get_pcon_max_frl_bw(intel_dp->dpcd, intel_dp->downstream_ports); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] DFP max bpc %d, max dotclock %d, TMDS clock %d-%d, PCON Max FRL BW %dGbps\n", connector->base.base.id, connector->base.name, intel_dp->dfp.max_bpc, @@ -5518,7 +5589,7 @@ intel_dp_can_ycbcr420(struct intel_dp *intel_dp) static void intel_dp_update_420(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; intel_dp->dfp.ycbcr420_passthrough = @@ -5536,7 +5607,7 @@ intel_dp_update_420(struct intel_dp *intel_dp) connector->base.ycbcr_420_allowed = intel_dp_can_ycbcr420(intel_dp); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] RGB->YcbCr conversion? %s, YCbCr 4:2:0 allowed? %s, YCbCr 4:4:4->4:2:0 conversion? %s\n", connector->base.base.id, connector->base.name, str_yes_no(intel_dp->dfp.rgb_to_ycbcr), @@ -5547,7 +5618,7 @@ intel_dp_update_420(struct intel_dp *intel_dp) static void intel_dp_set_edid(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; const struct drm_edid *drm_edid; bool vrr_capable; @@ -5560,7 +5631,7 @@ intel_dp_set_edid(struct intel_dp *intel_dp) drm_edid_connector_update(&connector->base, drm_edid); vrr_capable = intel_vrr_is_capable(connector); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] VRR capable: %s\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] VRR capable: %s\n", connector->base.base.id, connector->base.name, str_yes_no(vrr_capable)); drm_connector_set_vrr_capable_property(&connector->base, vrr_capable); @@ -5597,38 +5668,37 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) static void intel_dp_detect_sdp_caps(struct intel_dp *intel_dp) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); - intel_dp->as_sdp_supported = HAS_AS_SDP(i915) && + intel_dp->as_sdp_supported = HAS_AS_SDP(display) && drm_dp_as_sdp_supported(&intel_dp->aux, intel_dp->dpcd); } static int -intel_dp_detect(struct drm_connector *connector, +intel_dp_detect(struct drm_connector *_connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_connector *intel_connector = - to_intel_connector(connector); - struct intel_dp *intel_dp = intel_attached_dp(intel_connector); + struct intel_display *display = to_intel_display(_connector->dev); + struct intel_connector *connector = to_intel_connector(_connector); + struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum drm_connector_status status; int ret; - drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s]\n", - connector->base.id, connector->name); - drm_WARN_ON(&dev_priv->drm, - !drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", + connector->base.base.id, connector->base.name); + drm_WARN_ON(display->drm, + !drm_modeset_is_locked(&display->drm->mode_config.connection_mutex)); - if (!intel_display_device_enabled(dev_priv)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(dev_priv)) - return connector->status; + if (!intel_display_driver_check_access(display)) + return connector->base.status; - intel_dp_flush_connector_commits(intel_connector); + intel_dp_flush_connector_commits(connector); intel_pps_vdd_on(intel_dp); @@ -5654,7 +5724,7 @@ intel_dp_detect(struct drm_connector *connector, if (status == connector_status_disconnected) { intel_dp_test_reset(intel_dp); - memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); + memset(connector->dp.dsc_dpcd, 0, sizeof(connector->dp.dsc_dpcd)); intel_dp->psr.sink_panel_replay_support = false; intel_dp->psr.sink_panel_replay_su_support = false; @@ -5675,12 +5745,12 @@ intel_dp_detect(struct drm_connector *connector, } if (ret == 1) - intel_connector->base.epoch_counter++; + connector->base.epoch_counter++; if (!intel_dp_is_edp(intel_dp)) intel_psr_init_dpcd(intel_dp); - intel_dp_detect_dsc_caps(intel_dp, intel_connector); + intel_dp_detect_dsc_caps(intel_dp, connector); intel_dp_detect_sdp_caps(intel_dp); @@ -5723,8 +5793,7 @@ intel_dp_detect(struct drm_connector *connector, intel_dp->aux.i2c_defer_count = 0; intel_dp_set_edid(intel_dp); - if (intel_dp_is_edp(intel_dp) || - to_intel_connector(connector)->detect_edid) + if (intel_dp_is_edp(intel_dp) || connector->detect_edid) status = connector_status_connected; intel_dp_check_device_service_irq(intel_dp); @@ -5734,7 +5803,7 @@ out_unset_edid: intel_dp_unset_edid(intel_dp); if (!intel_dp_is_edp(intel_dp)) - drm_dp_set_subconnector_property(connector, + drm_dp_set_subconnector_property(&connector->base, status, intel_dp->dpcd, intel_dp->downstream_ports); @@ -5747,15 +5816,13 @@ out_vdd_off: static void intel_dp_force(struct drm_connector *connector) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &dig_port->base; - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_driver_check_access(dev_priv)) + if (!intel_display_driver_check_access(display)) return; intel_dp_unset_edid(intel_dp); @@ -5766,30 +5833,31 @@ intel_dp_force(struct drm_connector *connector) intel_dp_set_edid(intel_dp); } -static int intel_dp_get_modes(struct drm_connector *connector) +static int intel_dp_get_modes(struct drm_connector *_connector) { - struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_display *display = to_intel_display(_connector->dev); + struct intel_connector *connector = to_intel_connector(_connector); + struct intel_dp *intel_dp = intel_attached_dp(connector); int num_modes; /* drm_edid_connector_update() done in ->detect() or ->force() */ - num_modes = drm_edid_connector_add_modes(connector); + num_modes = drm_edid_connector_add_modes(&connector->base); /* Also add fixed mode, which may or may not be present in EDID */ - if (intel_dp_is_edp(intel_attached_dp(intel_connector))) - num_modes += intel_panel_get_modes(intel_connector); + if (intel_dp_is_edp(intel_dp)) + num_modes += intel_panel_get_modes(connector); if (num_modes) return num_modes; - if (!intel_connector->detect_edid) { - struct intel_dp *intel_dp = intel_attached_dp(intel_connector); + if (!connector->detect_edid) { struct drm_display_mode *mode; - mode = drm_dp_downstream_mode(connector->dev, + mode = drm_dp_downstream_mode(display->drm, intel_dp->dpcd, intel_dp->downstream_ports); if (mode) { - drm_mode_probed_add(connector, mode); + drm_mode_probed_add(&connector->base, mode); num_modes++; } } @@ -5800,7 +5868,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) static int intel_dp_connector_register(struct drm_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector)); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_lspcon *lspcon = &dig_port->lspcon; @@ -5810,7 +5878,7 @@ intel_dp_connector_register(struct drm_connector *connector) if (ret) return ret; - drm_dbg_kms(&i915->drm, "registering %s bus for %s\n", + drm_dbg_kms(display->drm, "registering %s bus for %s\n", intel_dp->aux.name, connector->kdev->kobj.name); intel_dp->aux.dev = connector->kdev; @@ -5847,10 +5915,11 @@ intel_dp_connector_unregister(struct drm_connector *connector) void intel_dp_connector_sync_state(struct intel_connector *connector, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); if (crtc_state && crtc_state->dsc.compression_enable) { - drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); + drm_WARN_ON(display->drm, + !connector->dp.dsc_decompression_aux); connector->dp.dsc_decompression_enabled = true; } else { connector->dp.dsc_decompression_enabled = false; @@ -5880,18 +5949,18 @@ void intel_dp_encoder_flush_work(struct drm_encoder *_encoder) intel_dp_aux_fini(intel_dp); } -void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) +void intel_dp_encoder_suspend(struct intel_encoder *encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_pps_vdd_off_sync(intel_dp); intel_dp_tunnel_suspend(intel_dp); } -void intel_dp_encoder_shutdown(struct intel_encoder *intel_encoder) +void intel_dp_encoder_shutdown(struct intel_encoder *encoder) { - struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_pps_wait_power_cycle(intel_dp); } @@ -5899,12 +5968,12 @@ void intel_dp_encoder_shutdown(struct intel_encoder *intel_encoder) static int intel_modeset_tile_group(struct intel_atomic_state *state, int tile_group_id) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct drm_connector_list_iter conn_iter; struct drm_connector *connector; int ret = 0; - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { struct drm_connector_state *conn_state; struct intel_crtc_state *crtc_state; @@ -5940,13 +6009,13 @@ static int intel_modeset_tile_group(struct intel_atomic_state *state, static int intel_modeset_affected_transcoders(struct intel_atomic_state *state, u8 transcoders) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_crtc *crtc; if (transcoders == 0) return 0; - for_each_intel_crtc(&dev_priv->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state; int ret; @@ -5973,7 +6042,7 @@ static int intel_modeset_affected_transcoders(struct intel_atomic_state *state, transcoders &= ~BIT(crtc_state->cpu_transcoder); } - drm_WARN_ON(&dev_priv->drm, transcoders != 0); + drm_WARN_ON(display->drm, transcoders != 0); return 0; } @@ -6007,7 +6076,7 @@ static int intel_modeset_synced_crtcs(struct intel_atomic_state *state, static int intel_dp_connector_atomic_check(struct drm_connector *conn, struct drm_atomic_state *_state) { - struct drm_i915_private *dev_priv = to_i915(conn->dev); + struct intel_display *display = to_intel_display(conn->dev); struct intel_atomic_state *state = to_intel_atomic_state(_state); struct drm_connector_state *conn_state = drm_atomic_get_new_connector_state(_state, conn); struct intel_connector *intel_conn = to_intel_connector(conn); @@ -6037,7 +6106,7 @@ static int intel_dp_connector_atomic_check(struct drm_connector *conn, * We don't enable port sync on BDW due to missing w/as and * due to not having adjusted the modeset sequence appropriately. */ - if (DISPLAY_VER(dev_priv) < 9) + if (DISPLAY_VER(display) < 9) return 0; if (conn->has_tile) { @@ -6052,6 +6121,7 @@ static int intel_dp_connector_atomic_check(struct drm_connector *conn, static void intel_dp_oob_hotplug_event(struct drm_connector *connector, enum drm_connector_status hpd_state) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); struct drm_i915_private *i915 = to_i915(connector->dev); bool hpd_high = hpd_state == connector_status_connected; @@ -6059,10 +6129,12 @@ static void intel_dp_oob_hotplug_event(struct drm_connector *connector, bool need_work = false; spin_lock_irq(&i915->irq_lock); - if (hpd_high != test_bit(hpd_pin, &i915->display.hotplug.oob_hotplug_last_state)) { - i915->display.hotplug.event_bits |= BIT(hpd_pin); + if (hpd_high != test_bit(hpd_pin, &display->hotplug.oob_hotplug_last_state)) { + display->hotplug.event_bits |= BIT(hpd_pin); - __assign_bit(hpd_pin, &i915->display.hotplug.oob_hotplug_last_state, hpd_high); + __assign_bit(hpd_pin, + &display->hotplug.oob_hotplug_last_state, + hpd_high); need_work = true; } spin_unlock_irq(&i915->irq_lock); @@ -6094,6 +6166,7 @@ static const struct drm_connector_helper_funcs intel_dp_connector_helper_funcs = enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) { + struct intel_display *display = to_intel_display(dig_port); struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_dp *intel_dp = &dig_port->dp; u8 dpcd[DP_RECEIVER_CAP_SIZE]; @@ -6108,7 +6181,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) * would end up in an endless cycle of * "vdd off -> long/short hpd -> vdd on -> detect -> vdd off -> ..." */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "ignoring %s hpd on eDP [ENCODER:%d:%s]\n", long_hpd ? "long" : "short", dig_port->base.base.base.id, @@ -6116,7 +6189,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) return IRQ_HANDLED; } - drm_dbg_kms(&i915->drm, "got hpd irq on [ENCODER:%d:%s] - %s\n", + drm_dbg_kms(display->drm, "got hpd irq on [ENCODER:%d:%s] - %s\n", dig_port->base.base.base.id, dig_port->base.base.name, long_hpd ? "long" : "short"); @@ -6149,7 +6222,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) return IRQ_HANDLED; } -static bool _intel_dp_is_port_edp(struct drm_i915_private *dev_priv, +static bool _intel_dp_is_port_edp(struct intel_display *display, const struct intel_bios_encoder_data *devdata, enum port port) { @@ -6157,41 +6230,40 @@ static bool _intel_dp_is_port_edp(struct drm_i915_private *dev_priv, * eDP not supported on g4x. so bail out early just * for a bit extra safety in case the VBT is bonkers. */ - if (DISPLAY_VER(dev_priv) < 5) + if (DISPLAY_VER(display) < 5) return false; - if (DISPLAY_VER(dev_priv) < 9 && port == PORT_A) + if (DISPLAY_VER(display) < 9 && port == PORT_A) return true; return devdata && intel_bios_encoder_supports_edp(devdata); } -bool intel_dp_is_port_edp(struct drm_i915_private *i915, enum port port) +bool intel_dp_is_port_edp(struct intel_display *display, enum port port) { - struct intel_display *display = &i915->display; const struct intel_bios_encoder_data *devdata = intel_bios_encoder_data_lookup(display, port); - return _intel_dp_is_port_edp(i915, devdata, port); + return _intel_dp_is_port_edp(display, devdata, port); } bool intel_dp_has_gamut_metadata_dip(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); enum port port = encoder->port; if (intel_bios_encoder_is_lspcon(encoder->devdata)) return false; - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) return true; if (port == PORT_A) return false; - if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - DISPLAY_VER(i915) >= 9) + if (display->platform.haswell || display->platform.broadwell || + DISPLAY_VER(display) >= 9) return true; return false; @@ -6200,19 +6272,19 @@ intel_dp_has_gamut_metadata_dip(struct intel_encoder *encoder) static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct intel_display *display = to_intel_display(intel_dp); enum port port = dp_to_dig_port(intel_dp)->base.port; if (!intel_dp_is_edp(intel_dp)) drm_connector_attach_dp_subconnector_property(connector); - if (!IS_G4X(dev_priv) && port != PORT_A) + if (!display->platform.g4x && port != PORT_A) intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (HAS_GMCH(dev_priv)) + if (HAS_GMCH(display)) drm_connector_attach_max_bpc_property(connector, 6, 10); - else if (DISPLAY_VER(dev_priv) >= 5) + else if (DISPLAY_VER(display) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); /* Register HDMI colorspace for case of lspcon */ @@ -6226,22 +6298,22 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect if (intel_dp_has_gamut_metadata_dip(&dp_to_dig_port(intel_dp)->base)) drm_connector_attach_hdr_output_metadata_property(connector); - if (HAS_VRR(dev_priv)) + if (HAS_VRR(display)) drm_connector_attach_vrr_capable_property(connector); } static void intel_edp_add_properties(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; - struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode = intel_panel_preferred_fixed_mode(connector); intel_attach_scaling_mode_property(&connector->base); drm_connector_set_panel_orientation_with_quirk(&connector->base, - i915->display.vbt.orientation, + display->vbt.orientation, fixed_mode->hdisplay, fixed_mode->vdisplay); } @@ -6249,21 +6321,20 @@ intel_edp_add_properties(struct intel_dp *intel_dp) static void intel_edp_backlight_setup(struct intel_dp *intel_dp, struct intel_connector *connector) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum pipe pipe = INVALID_PIPE; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (display->platform.valleyview || display->platform.cherryview) pipe = vlv_pps_backlight_initial_pipe(intel_dp); intel_backlight_setup(connector, pipe); } static bool intel_edp_init_connector(struct intel_dp *intel_dp, - struct intel_connector *intel_connector) + struct intel_connector *connector) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct drm_connector *connector = &intel_connector->base; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct drm_display_mode *fixed_mode; struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; bool has_dpcd; @@ -6279,19 +6350,19 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * with an already powered-on LVDS power sequencer. */ if (intel_get_lvds_encoder(dev_priv)) { - drm_WARN_ON(&dev_priv->drm, + drm_WARN_ON(display->drm, !(HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))); - drm_info(&dev_priv->drm, + drm_info(display->drm, "LVDS was detected, not registering eDP\n"); return false; } - intel_bios_init_panel_early(display, &intel_connector->panel, + intel_bios_init_panel_early(display, &connector->panel, encoder->devdata); if (!intel_pps_init(intel_dp)) { - drm_info(&dev_priv->drm, + drm_info(display->drm, "[ENCODER:%d:%s] unusable PPS, disabling eDP\n", encoder->base.base.id, encoder->base.name); /* @@ -6314,11 +6385,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, intel_alpm_init_dpcd(intel_dp); /* Cache DPCD and EDID for edp. */ - has_dpcd = intel_edp_init_dpcd(intel_dp, intel_connector); + has_dpcd = intel_edp_init_dpcd(intel_dp, connector); if (!has_dpcd) { /* if this fails, presume the device is a ghost */ - drm_info(&dev_priv->drm, + drm_info(display->drm, "[ENCODER:%d:%s] failed to retrieve link info, disabling eDP\n", encoder->base.base.id, encoder->base.name); goto out_vdd_off; @@ -6341,7 +6412,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * DPCD read? Would need sort out the VDD handling... */ if (!intel_digital_port_connected(encoder)) { - drm_info(&dev_priv->drm, + drm_info(display->drm, "[ENCODER:%d:%s] HPD is down, disabling eDP\n", encoder->base.base.id, encoder->base.name); goto out_vdd_off; @@ -6353,30 +6424,30 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * back to checking for a VGA branch device. Only do this * on known affected platforms to minimize false positives. */ - if (DISPLAY_VER(dev_priv) == 9 && drm_dp_is_branch(intel_dp->dpcd) && + if (DISPLAY_VER(display) == 9 && drm_dp_is_branch(intel_dp->dpcd) && (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) == DP_DWN_STRM_PORT_TYPE_ANALOG) { - drm_info(&dev_priv->drm, + drm_info(display->drm, "[ENCODER:%d:%s] VGA converter detected, disabling eDP\n", encoder->base.base.id, encoder->base.name); goto out_vdd_off; } } - mutex_lock(&dev_priv->drm.mode_config.mutex); - drm_edid = drm_edid_read_ddc(connector, connector->ddc); + mutex_lock(&display->drm->mode_config.mutex); + drm_edid = drm_edid_read_ddc(&connector->base, connector->base.ddc); if (!drm_edid) { /* Fallback to EDID from ACPI OpRegion, if any */ - drm_edid = intel_opregion_get_edid(intel_connector); + drm_edid = intel_opregion_get_edid(connector); if (drm_edid) - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] Using OpRegion EDID\n", - connector->base.id, connector->name); + connector->base.base.id, connector->base.name); } if (drm_edid) { - if (drm_edid_connector_update(connector, drm_edid) || - !drm_edid_connector_add_modes(connector)) { - drm_edid_connector_update(connector, NULL); + if (drm_edid_connector_update(&connector->base, drm_edid) || + !drm_edid_connector_add_modes(&connector->base)) { + drm_edid_connector_update(&connector->base, NULL); drm_edid_free(drm_edid); drm_edid = ERR_PTR(-EINVAL); } @@ -6384,34 +6455,34 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, drm_edid = ERR_PTR(-ENOENT); } - intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, + intel_bios_init_panel_late(display, &connector->panel, encoder->devdata, IS_ERR(drm_edid) ? NULL : drm_edid); - intel_panel_add_edid_fixed_modes(intel_connector, true); + intel_panel_add_edid_fixed_modes(connector, true); /* MSO requires information from the EDID */ intel_edp_mso_init(intel_dp); /* multiply the mode clock and horizontal timings for MSO */ - list_for_each_entry(fixed_mode, &intel_connector->panel.fixed_modes, head) - intel_edp_mso_mode_fixup(intel_connector, fixed_mode); + list_for_each_entry(fixed_mode, &connector->panel.fixed_modes, head) + intel_edp_mso_mode_fixup(connector, fixed_mode); /* fallback to VBT if available for eDP */ - if (!intel_panel_preferred_fixed_mode(intel_connector)) - intel_panel_add_vbt_lfp_fixed_mode(intel_connector); + if (!intel_panel_preferred_fixed_mode(connector)) + intel_panel_add_vbt_lfp_fixed_mode(connector); - mutex_unlock(&dev_priv->drm.mode_config.mutex); + mutex_unlock(&display->drm->mode_config.mutex); - if (!intel_panel_preferred_fixed_mode(intel_connector)) { - drm_info(&dev_priv->drm, + if (!intel_panel_preferred_fixed_mode(connector)) { + drm_info(display->drm, "[ENCODER:%d:%s] failed to find fixed mode for the panel, disabling eDP\n", encoder->base.base.id, encoder->base.name); goto out_vdd_off; } - intel_panel_init(intel_connector, drm_edid); + intel_panel_init(connector, drm_edid); - intel_edp_backlight_setup(intel_dp, intel_connector); + intel_edp_backlight_setup(intel_dp, connector); intel_edp_add_properties(intel_dp); @@ -6421,34 +6492,32 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, out_vdd_off: intel_pps_vdd_off_sync(intel_dp); - intel_bios_fini_panel(&intel_connector->panel); + intel_bios_fini_panel(&connector->panel); return false; } static void intel_dp_modeset_retry_work_fn(struct work_struct *work) { - struct intel_connector *intel_connector; - struct drm_connector *connector; + struct intel_connector *connector = container_of(work, typeof(*connector), + modeset_retry_work); + struct intel_display *display = to_intel_display(connector); - intel_connector = container_of(work, typeof(*intel_connector), - modeset_retry_work); - connector = &intel_connector->base; - drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s]\n", connector->base.id, - connector->name); + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", connector->base.base.id, + connector->base.name); /* Grab the locks before changing connector property*/ - mutex_lock(&connector->dev->mode_config.mutex); + mutex_lock(&display->drm->mode_config.mutex); /* Set connector link status to BAD and send a Uevent to notify * userspace to do a modeset. */ - drm_connector_set_link_status_property(connector, + drm_connector_set_link_status_property(&connector->base, DRM_MODE_LINK_STATUS_BAD); - mutex_unlock(&connector->dev->mode_config.mutex); + mutex_unlock(&display->drm->mode_config.mutex); /* Send Hotplug uevent so userspace can reprobe */ - drm_kms_helper_connector_hotplug_event(connector); + drm_kms_helper_connector_hotplug_event(&connector->base); - drm_connector_put(connector); + drm_connector_put(&connector->base); } void intel_dp_init_modeset_retry_work(struct intel_connector *connector) @@ -6459,45 +6528,44 @@ void intel_dp_init_modeset_retry_work(struct intel_connector *connector) bool intel_dp_init_connector(struct intel_digital_port *dig_port, - struct intel_connector *intel_connector) + struct intel_connector *connector) { struct intel_display *display = to_intel_display(dig_port); - struct drm_connector *connector = &intel_connector->base; struct intel_dp *intel_dp = &dig_port->dp; - struct intel_encoder *intel_encoder = &dig_port->base; - struct drm_device *dev = intel_encoder->base.dev; + struct intel_encoder *encoder = &dig_port->base; + struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_encoder->port; + enum port port = encoder->port; int type; /* Initialize the work for modeset in case of link train failure */ - intel_dp_init_modeset_retry_work(intel_connector); + intel_dp_init_modeset_retry_work(connector); if (drm_WARN(dev, dig_port->max_lanes < 1, "Not enough lanes (%d) for DP on [ENCODER:%d:%s]\n", - dig_port->max_lanes, intel_encoder->base.base.id, - intel_encoder->base.name)) + dig_port->max_lanes, encoder->base.base.id, + encoder->base.name)) return false; intel_dp->reset_link_params = true; /* Preserve the current hw state. */ - intel_dp->DP = intel_de_read(dev_priv, intel_dp->output_reg); - intel_dp->attached_connector = intel_connector; + intel_dp->DP = intel_de_read(display, intel_dp->output_reg); + intel_dp->attached_connector = connector; - if (_intel_dp_is_port_edp(dev_priv, intel_encoder->devdata, port)) { + if (_intel_dp_is_port_edp(display, encoder->devdata, port)) { /* * Currently we don't support eDP on TypeC ports for DISPLAY_VER < 30, * although in theory it could work on TypeC legacy ports. */ - drm_WARN_ON(dev, intel_encoder_is_tc(intel_encoder) && - DISPLAY_VER(dev_priv) < 30); + drm_WARN_ON(dev, intel_encoder_is_tc(encoder) && + DISPLAY_VER(display) < 30); type = DRM_MODE_CONNECTOR_eDP; - intel_encoder->type = INTEL_OUTPUT_EDP; + encoder->type = INTEL_OUTPUT_EDP; /* eDP only on port B and/or C on vlv/chv */ - if (drm_WARN_ON(dev, (IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv)) && + if (drm_WARN_ON(dev, (display->platform.valleyview || + display->platform.cherryview) && port != PORT_B && port != PORT_C)) return false; } else { @@ -6507,37 +6575,37 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_dp_set_default_sink_rates(intel_dp); intel_dp_set_default_max_sink_lane_count(intel_dp); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (display->platform.valleyview || display->platform.cherryview) vlv_pps_pipe_init(intel_dp); intel_dp_aux_init(intel_dp); - intel_connector->dp.dsc_decompression_aux = &intel_dp->aux; + connector->dp.dsc_decompression_aux = &intel_dp->aux; - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Adding %s connector on [ENCODER:%d:%s]\n", type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", - intel_encoder->base.base.id, intel_encoder->base.name); + encoder->base.base.id, encoder->base.name); - drm_connector_init_with_ddc(dev, connector, &intel_dp_connector_funcs, + drm_connector_init_with_ddc(dev, &connector->base, &intel_dp_connector_funcs, type, &intel_dp->aux.ddc); - drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); + drm_connector_helper_add(&connector->base, &intel_dp_connector_helper_funcs); - if (!HAS_GMCH(dev_priv) && DISPLAY_VER(dev_priv) < 12) - connector->interlace_allowed = true; + if (!HAS_GMCH(display) && DISPLAY_VER(display) < 12) + connector->base.interlace_allowed = true; if (type != DRM_MODE_CONNECTOR_eDP) - intel_connector->polled = DRM_CONNECTOR_POLL_HPD; - intel_connector->base.polled = intel_connector->polled; + connector->polled = DRM_CONNECTOR_POLL_HPD; + connector->base.polled = connector->polled; - intel_connector_attach_encoder(intel_connector, intel_encoder); + intel_connector_attach_encoder(connector, encoder); - if (HAS_DDI(dev_priv)) - intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; + if (HAS_DDI(display)) + connector->get_hw_state = intel_ddi_connector_get_hw_state; else - intel_connector->get_hw_state = intel_connector_get_hw_state; - intel_connector->sync_state = intel_dp_connector_sync_state; + connector->get_hw_state = intel_connector_get_hw_state; + connector->sync_state = intel_dp_connector_sync_state; - if (!intel_edp_init_connector(intel_dp, intel_connector)) { + if (!intel_edp_init_connector(intel_dp, connector)) { intel_dp_aux_fini(intel_dp); goto fail; } @@ -6547,15 +6615,14 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_dp_reset_link_params(intel_dp); /* init MST on ports that can support it */ - intel_dp_mst_encoder_init(dig_port, - intel_connector->base.base.id); + intel_dp_mst_encoder_init(dig_port, connector->base.base.id); - intel_dp_add_properties(intel_dp, connector); + intel_dp_add_properties(intel_dp, &connector->base); if (is_hdcp_supported(display, port) && !intel_dp_is_edp(intel_dp)) { - int ret = intel_dp_hdcp_init(dig_port, intel_connector); + int ret = intel_dp_hdcp_init(dig_port, connector); if (ret) - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "HDCP init failed, skipping.\n"); } @@ -6568,19 +6635,19 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, fail: intel_display_power_flush_work(dev_priv); - drm_connector_cleanup(connector); + drm_connector_cleanup(&connector->base); return false; } -void intel_dp_mst_suspend(struct drm_i915_private *dev_priv) +void intel_dp_mst_suspend(struct intel_display *display) { struct intel_encoder *encoder; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; - for_each_intel_encoder(&dev_priv->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { struct intel_dp *intel_dp; if (encoder->type != INTEL_OUTPUT_DDI) @@ -6596,14 +6663,14 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv) } } -void intel_dp_mst_resume(struct drm_i915_private *dev_priv) +void intel_dp_mst_resume(struct intel_display *display) { struct intel_encoder *encoder; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(display)) return; - for_each_intel_encoder(&dev_priv->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { struct intel_dp *intel_dp; int ret; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 48f10876be65..ca49f0a05da5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -12,14 +12,14 @@ enum intel_output_format; enum pipe; enum port; struct drm_connector_state; +struct drm_dp_vsc_sdp; struct drm_encoder; -struct drm_i915_private; struct drm_modeset_acquire_ctx; -struct drm_dp_vsc_sdp; struct intel_atomic_state; struct intel_connector; struct intel_crtc_state; struct intel_digital_port; +struct intel_display; struct intel_dp; struct intel_encoder; @@ -87,15 +87,15 @@ bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state); bool intel_dp_has_dsc(const struct intel_connector *connector); int intel_dp_link_symbol_size(int rate); int intel_dp_link_symbol_clock(int rate); -bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port); +bool intel_dp_is_port_edp(struct intel_display *display, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd); void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); void intel_edp_backlight_off(const struct drm_connector_state *conn_state); void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp); -void intel_dp_mst_suspend(struct drm_i915_private *dev_priv); -void intel_dp_mst_resume(struct drm_i915_private *dev_priv); +void intel_dp_mst_suspend(struct intel_display *display); +void intel_dp_mst_resume(struct intel_display *display); int intel_dp_max_source_lane_count(struct intel_digital_port *dig_port); int intel_dp_max_link_rate(struct intel_dp *intel_dp); int intel_dp_max_lane_count(struct intel_dp *intel_dp); @@ -112,15 +112,15 @@ void intel_dp_reset_link_params(struct intel_dp *intel_dp); void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select); -bool intel_dp_source_supports_tps3(struct drm_i915_private *i915); -bool intel_dp_source_supports_tps4(struct drm_i915_private *i915); +bool intel_dp_source_supports_tps3(struct intel_display *display); +bool intel_dp_source_supports_tps4(struct intel_display *display); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16, int bw_overhead); int intel_dp_max_link_data_rate(struct intel_dp *intel_dp, int max_dprx_rate, int max_dprx_lanes); -bool intel_dp_joiner_needs_dsc(struct drm_i915_private *i915, +bool intel_dp_joiner_needs_dsc(struct intel_display *display, int num_joined_pipes); bool intel_dp_has_joiner(struct intel_dp *intel_dp); bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, @@ -137,16 +137,16 @@ bool intel_digital_port_connected(struct intel_encoder *encoder); bool intel_digital_port_connected_locked(struct intel_encoder *encoder); int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, u8 dsc_max_bpc); -u16 intel_dp_dsc_get_max_compressed_bpp(struct drm_i915_private *i915, +u16 intel_dp_dsc_get_max_compressed_bpp(struct intel_display *display, u32 link_clock, u32 lane_count, u32 mode_clock, u32 mode_hdisplay, int num_joined_pipes, enum intel_output_format output_format, u32 pipe_bpp, u32 timeslots); -int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config); +int intel_dp_dsc_sink_min_compressed_bpp(const struct intel_crtc_state *pipe_config); int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector, - struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *pipe_config, int bpc); u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, int mode_clock, int mode_hdisplay, @@ -170,10 +170,11 @@ bool intel_dp_supports_fec(struct intel_dp *intel_dp, const struct intel_connector *connector, const struct intel_crtc_state *pipe_config); -bool intel_dp_supports_dsc(const struct intel_connector *connector, +bool intel_dp_supports_dsc(struct intel_dp *intel_dp, + const struct intel_connector *connector, const struct intel_crtc_state *crtc_state); -u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 pipe_bpp); +u32 intel_dp_dsc_nearest_valid_bpp(struct intel_display *display, u32 bpp, u32 pipe_bpp); void intel_ddi_update_pipe(struct intel_atomic_state *state, struct intel_encoder *encoder, @@ -193,11 +194,11 @@ void intel_dp_invalidate_source_oui(struct intel_dp *intel_dp); void intel_dp_wait_source_oui(struct intel_dp *intel_dp); int intel_dp_output_bpp(enum intel_output_format output_format, int bpp); -bool -intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - bool dsc, - struct link_config_limits *limits); +bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + bool respect_downstream_limits, + bool dsc, + struct link_config_limits *limits); void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, struct intel_connector *connector); bool intel_dp_has_gamut_metadata_dip(struct intel_encoder *encoder); @@ -206,5 +207,7 @@ bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, u8 lane_count); bool intel_dp_has_connector(struct intel_dp *intel_dp, const struct drm_connector_state *conn_state); +int intel_dp_dsc_max_src_input_bpc(struct intel_display *display); +int intel_dp_dsc_min_src_input_bpc(void); #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 04a7acd7f73c..40c697476b72 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -5,8 +5,6 @@ #include "i915_drv.h" #include "i915_reg.h" -#include "i915_trace.h" -#include "intel_bios.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" @@ -15,6 +13,7 @@ #include "intel_pps.h" #include "intel_quirks.h" #include "intel_tc.h" +#include "intel_uncore_trace.h" #define AUX_CH_NAME_BUFSIZE 6 diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 33f72db99b58..c846ef4acf5b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -34,8 +34,9 @@ * for some reason. */ -#include "i915_drv.h" +#include "i915_utils.h" #include "intel_backlight.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_aux_backlight.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index c911d4a19e62..c0f8473e7223 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -25,7 +25,8 @@ #include <drm/display/drm_dp_helper.h> -#include "i915_drv.h" +#include "i915_utils.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" @@ -221,7 +222,6 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI int intel_dp_read_dprx_caps(struct intel_dp *intel_dp, u8 dpcd[DP_RECEIVER_CAP_SIZE]) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); if (intel_dp_is_edp(intel_dp)) return 0; @@ -230,7 +230,7 @@ int intel_dp_read_dprx_caps(struct intel_dp *intel_dp, u8 dpcd[DP_RECEIVER_CAP_S * Detecting LTTPRs must be avoided on platforms with an AUX timeout * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1). */ - if (DISPLAY_VER(display) >= 10 && !IS_GEMINILAKE(i915)) + if (DISPLAY_VER(display) >= 10 && !display->platform.geminilake) if (drm_dp_dpcd_probe(&intel_dp->aux, DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV)) return -EIO; @@ -262,7 +262,6 @@ int intel_dp_read_dprx_caps(struct intel_dp *intel_dp, u8 dpcd[DP_RECEIVER_CAP_S int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); int lttpr_count = 0; /* @@ -270,7 +269,7 @@ int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1). */ if (!intel_dp_is_edp(intel_dp) && - (DISPLAY_VER(display) >= 10 && !IS_GEMINILAKE(i915))) { + (DISPLAY_VER(display) >= 10 && !display->platform.geminilake)) { u8 dpcd[DP_RECEIVER_CAP_SIZE]; int err = intel_dp_read_dprx_caps(intel_dp, dpcd); @@ -391,10 +390,9 @@ static bool has_per_lane_signal_levels(struct intel_dp *intel_dp, enum drm_dp_phy dp_phy) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); return !intel_dp_phy_is_downstream_of_source(intel_dp, dp_phy) || - DISPLAY_VER(display) >= 10 || IS_BROXTON(i915); + DISPLAY_VER(display) >= 10 || display->platform.broxton; } /* 128b/132b */ @@ -898,7 +896,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp, voltage_tries = 1; for (cr_tries = 0; cr_tries < max_cr_tries; ++cr_tries) { - usleep_range(delay_us, 2 * delay_us); + fsleep(delay_us); if (drm_dp_dpcd_read_phy_link_status(&intel_dp->aux, dp_phy, link_status) < 0) { @@ -959,7 +957,6 @@ static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, enum drm_dp_phy dp_phy) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); bool source_tps3, sink_tps3, source_tps4, sink_tps4; /* UHBR+ use separate 128b/132b TPS2 */ @@ -972,7 +969,7 @@ static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, * TPS4 as of Feb 2018 as per VESA eDP_v1.4b_E1 specification. * LTTPRs must support TPS4. */ - source_tps4 = intel_dp_source_supports_tps4(i915); + source_tps4 = intel_dp_source_supports_tps4(display); sink_tps4 = dp_phy != DP_PHY_DPRX || drm_dp_tps4_supported(intel_dp->dpcd); if (source_tps4 && sink_tps4) { @@ -990,7 +987,7 @@ static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, * TPS3 support is mandatory for downstream devices that * support HBR2. However, not all sinks follow the spec. */ - source_tps3 = intel_dp_source_supports_tps3(i915); + source_tps3 = intel_dp_source_supports_tps3(display); sink_tps3 = dp_phy != DP_PHY_DPRX || drm_dp_tps3_supported(intel_dp->dpcd); if (source_tps3 && sink_tps3) { @@ -1040,7 +1037,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp, } for (tries = 0; tries < 5; tries++) { - usleep_range(delay_us, 2 * delay_us); + fsleep(delay_us); if (drm_dp_dpcd_read_phy_link_status(&intel_dp->aux, dp_phy, link_status) < 0) { @@ -1414,16 +1411,10 @@ intel_dp_128b132b_lane_eq(struct intel_dp *intel_dp, } /* Time budget for the LANEx_EQ_DONE Sequence */ - deadline = jiffies + msecs_to_jiffies_timeout(400); + deadline = jiffies + msecs_to_jiffies_timeout(450); for (try = 0; try < max_tries; try++) { - usleep_range(delay_us, 2 * delay_us); - - /* - * The delay may get updated. The transmitter shall read the - * delay before link status during link training. - */ - delay_us = drm_dp_128b132b_read_aux_rd_interval(&intel_dp->aux); + fsleep(delay_us); if (drm_dp_dpcd_read_link_status(&intel_dp->aux, link_status) < 0) { lt_err(intel_dp, DP_PHY_DPRX, "Failed to read link status\n"); @@ -1451,8 +1442,15 @@ intel_dp_128b132b_lane_eq(struct intel_dp *intel_dp, if (time_after(jiffies, deadline)) timeout = true; /* try one last time after deadline */ - /* Update signal levels and training set as requested. */ + /* + * During LT, Tx shall read AUX_RD_INTERVAL just before writing the new FFE + * presets. + */ + delay_us = drm_dp_128b132b_read_aux_rd_interval(&intel_dp->aux); + intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, link_status); + + /* Update signal levels and training set as requested. */ if (!intel_dp_update_link_train(intel_dp, crtc_state, DP_PHY_DPRX)) { lt_err(intel_dp, DP_PHY_DPRX, "Failed to update TX FFE settings\n"); return false; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0812a3fd8b37..227bd2783e64 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -53,14 +53,64 @@ #include "intel_vdsc.h" #include "skl_scaler.h" +/* + * DP MST (DisplayPort Multi-Stream Transport) + * + * MST support on the source depends on the platform and port. DP initialization + * sets up MST for each MST capable encoder. This will become the primary + * encoder for the port. + * + * MST initialization of each primary encoder creates MST stream encoders, one + * per pipe, and initializes the MST topology manager. The MST stream encoders + * are sometimes called "fake encoders", because they're virtual, not + * physical. Thus there are (number of MST capable ports) x (number of pipes) + * MST stream encoders in total. + * + * Decision to use MST for a sink happens at detect on the connector attached to + * the primary encoder, and this will not change while the sink is connected. We + * always use MST when possible, including for SST sinks with sideband messaging + * support. + * + * The connectors for the MST streams are added and removed dynamically by the + * topology manager. Their connection status is also determined by the topology + * manager. + * + * On hardware, each transcoder may be associated with a single DDI + * port. Multiple transcoders may be associated with the same DDI port only if + * the port is in MST mode. + * + * On TGL+, all the transcoders streaming on the same DDI port will indicate a + * primary transcoder; the TGL_DP_TP_CTL and TGL_DP_TP_STATUS registers are + * relevant only on the primary transcoder. Prior to that, they are port + * registers. + */ + +/* From fake MST stream encoder to primary encoder */ +static struct intel_encoder *to_primary_encoder(struct intel_encoder *encoder) +{ + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); + struct intel_digital_port *dig_port = intel_mst->primary; + + return &dig_port->base; +} + +/* From fake MST stream encoder to primary DP */ +static struct intel_dp *to_primary_dp(struct intel_encoder *encoder) +{ + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); + struct intel_digital_port *dig_port = intel_mst->primary; + + return &dig_port->dp; +} + static int intel_dp_mst_max_dpt_bpp(const struct intel_crtc_state *crtc_state, bool dsc) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!intel_dp_is_uhbr(crtc_state) || DISPLAY_VER(i915) >= 20 || !dsc) + if (!intel_dp_is_uhbr(crtc_state) || DISPLAY_VER(display) >= 20 || !dsc) return INT_MAX; /* @@ -89,7 +139,6 @@ static int intel_dp_mst_max_dpt_bpp(const struct intel_crtc_state *crtc_state, } static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state, - const struct intel_connector *connector, bool ssc, int dsc_slice_count, int bpp_x16) { const struct drm_display_mode *adjusted_mode = @@ -118,7 +167,6 @@ static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state, } static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state, - const struct intel_connector *connector, int overhead, int bpp_x16, struct intel_link_m_n *m_n) @@ -161,35 +209,22 @@ static int intel_dp_mst_dsc_get_slice_count(const struct intel_connector *connec num_joined_pipes); } -static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, - int max_bpp, - int min_bpp, - struct link_config_limits *limits, - struct drm_connector_state *conn_state, - int step, - bool dsc) +int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + int max_bpp, int min_bpp, + struct drm_connector_state *conn_state, + int step, bool dsc) { + struct intel_display *display = to_intel_display(intel_dp); struct drm_atomic_state *state = crtc_state->uapi.state; - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_dp *intel_dp = &intel_mst->primary->dp; - struct drm_dp_mst_topology_state *mst_state; struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + fixed20_12 pbn_div; int bpp, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp; - int ret = 0; - - mst_state = drm_atomic_get_mst_topology_state(state, &intel_dp->mst_mgr); - if (IS_ERR(mst_state)) - return PTR_ERR(mst_state); - - crtc_state->lane_count = limits->max_lane_count; - crtc_state->port_clock = limits->max_rate; if (dsc) { if (!intel_dp_supports_fec(intel_dp, connector, crtc_state)) @@ -198,24 +233,23 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, crtc_state->fec_enable = !intel_dp_is_uhbr(crtc_state); } - mst_state->pbn_div = drm_dp_get_vc_payload_bw(&intel_dp->mst_mgr, - crtc_state->port_clock, - crtc_state->lane_count); + pbn_div = drm_dp_get_vc_payload_bw(crtc_state->port_clock, + crtc_state->lane_count); max_dpt_bpp = intel_dp_mst_max_dpt_bpp(crtc_state, dsc); if (max_bpp > max_dpt_bpp) { - drm_dbg_kms(&i915->drm, "Limiting bpp to max DPT bpp (%d -> %d)\n", + drm_dbg_kms(display->drm, "Limiting bpp to max DPT bpp (%d -> %d)\n", max_bpp, max_dpt_bpp); max_bpp = max_dpt_bpp; } - drm_dbg_kms(&i915->drm, "Looking for slots in range min bpp %d max bpp %d\n", + drm_dbg_kms(display->drm, "Looking for slots in range min bpp %d max bpp %d\n", min_bpp, max_bpp); if (dsc) { dsc_slice_count = intel_dp_mst_dsc_get_slice_count(connector, crtc_state); if (!dsc_slice_count) { - drm_dbg_kms(&i915->drm, "Can't get valid DSC slice count\n"); + drm_dbg_kms(display->drm, "Can't get valid DSC slice count\n"); return -ENOSPC; } @@ -223,149 +257,169 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) { int local_bw_overhead; - int remote_bw_overhead; int link_bpp_x16; - int remote_tu; - fixed20_12 pbn; - drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp); + drm_dbg_kms(display->drm, "Trying bpp %d\n", bpp); link_bpp_x16 = fxp_q4_from_int(dsc ? bpp : intel_dp_output_bpp(crtc_state->output_format, bpp)); - local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, + local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, false, dsc_slice_count, link_bpp_x16); - remote_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, - true, dsc_slice_count, link_bpp_x16); - - intel_dp_mst_compute_m_n(crtc_state, connector, + intel_dp_mst_compute_m_n(crtc_state, local_bw_overhead, link_bpp_x16, &crtc_state->dp_m_n); - /* - * The TU size programmed to the HW determines which slots in - * an MTP frame are used for this stream, which needs to match - * the payload size programmed to the first downstream branch - * device's payload table. - * - * Note that atm the payload's PBN value DRM core sends via - * the ALLOCATE_PAYLOAD side-band message matches the payload - * size (which it calculates from the PBN value) it programs - * to the first branch device's payload table. The allocation - * in the payload table could be reduced though (to - * crtc_state->dp_m_n.tu), provided that the driver doesn't - * enable SSC on the corresponding link. - */ - pbn.full = dfixed_const(intel_dp_mst_calc_pbn(adjusted_mode->crtc_clock, - link_bpp_x16, - remote_bw_overhead)); - remote_tu = DIV_ROUND_UP(pbn.full, mst_state->pbn_div.full); - - /* - * Aligning the TUs ensures that symbols consisting of multiple - * (4) symbol cycles don't get split between two consecutive - * MTPs, as required by Bspec. - * TODO: remove the alignment restriction for 128b/132b links - * on some platforms, where Bspec allows this. - */ - remote_tu = ALIGN(remote_tu, 4 / crtc_state->lane_count); - - /* - * Also align PBNs accordingly, since MST core will derive its - * own copy of TU from the PBN in drm_dp_atomic_find_time_slots(). - * The above comment about the difference between the PBN - * allocated for the whole path and the TUs allocated for the - * first branch device's link also applies here. - */ - pbn.full = remote_tu * mst_state->pbn_div.full; - crtc_state->pbn = dfixed_trunc(pbn); - - drm_WARN_ON(&i915->drm, remote_tu < crtc_state->dp_m_n.tu); - crtc_state->dp_m_n.tu = remote_tu; + if (intel_dp->is_mst) { + int remote_bw_overhead; + int remote_tu; + fixed20_12 pbn; + + remote_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, + true, dsc_slice_count, link_bpp_x16); + + /* + * The TU size programmed to the HW determines which slots in + * an MTP frame are used for this stream, which needs to match + * the payload size programmed to the first downstream branch + * device's payload table. + * + * Note that atm the payload's PBN value DRM core sends via + * the ALLOCATE_PAYLOAD side-band message matches the payload + * size (which it calculates from the PBN value) it programs + * to the first branch device's payload table. The allocation + * in the payload table could be reduced though (to + * crtc_state->dp_m_n.tu), provided that the driver doesn't + * enable SSC on the corresponding link. + */ + pbn.full = dfixed_const(intel_dp_mst_calc_pbn(adjusted_mode->crtc_clock, + link_bpp_x16, + remote_bw_overhead)); + remote_tu = DIV_ROUND_UP(pbn.full, pbn_div.full); + + /* + * Aligning the TUs ensures that symbols consisting of multiple + * (4) symbol cycles don't get split between two consecutive + * MTPs, as required by Bspec. + * TODO: remove the alignment restriction for 128b/132b links + * on some platforms, where Bspec allows this. + */ + remote_tu = ALIGN(remote_tu, 4 / crtc_state->lane_count); + + /* + * Also align PBNs accordingly, since MST core will derive its + * own copy of TU from the PBN in drm_dp_atomic_find_time_slots(). + * The above comment about the difference between the PBN + * allocated for the whole path and the TUs allocated for the + * first branch device's link also applies here. + */ + pbn.full = remote_tu * pbn_div.full; + + drm_WARN_ON(display->drm, remote_tu < crtc_state->dp_m_n.tu); + crtc_state->dp_m_n.tu = remote_tu; + + slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr, + connector->port, + dfixed_trunc(pbn)); + } else { + /* Same as above for remote_tu */ + crtc_state->dp_m_n.tu = ALIGN(crtc_state->dp_m_n.tu, + 4 / crtc_state->lane_count); + + if (crtc_state->dp_m_n.tu <= 64) + slots = crtc_state->dp_m_n.tu; + else + slots = -EINVAL; + } - slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr, - connector->port, - crtc_state->pbn); if (slots == -EDEADLK) return slots; if (slots >= 0) { - drm_WARN_ON(&i915->drm, slots != crtc_state->dp_m_n.tu); + drm_WARN_ON(display->drm, slots != crtc_state->dp_m_n.tu); break; } } - /* We failed to find a proper bpp/timeslots, return error */ - if (ret) - slots = ret; - if (slots < 0) { - drm_dbg_kms(&i915->drm, "failed finding vcpi slots:%d\n", + drm_dbg_kms(display->drm, "failed finding vcpi slots:%d\n", slots); - } else { - if (!dsc) - crtc_state->pipe_bpp = bpp; - else - crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(bpp); - drm_dbg_kms(&i915->drm, "Got %d slots for pipe bpp %d dsc %d\n", slots, bpp, dsc); + return slots; } - return slots; + if (!dsc) + crtc_state->pipe_bpp = bpp; + else + crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(bpp); + + drm_dbg_kms(display->drm, "Got %d slots for pipe bpp %d dsc %d\n", + slots, bpp, dsc); + + return 0; } -static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - struct link_config_limits *limits) +static int mst_stream_find_vcpi_slots_for_bpp(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + int max_bpp, int min_bpp, + struct link_config_limits *limits, + struct drm_connector_state *conn_state, + int step, bool dsc) { - int slots = -EINVAL; + struct drm_atomic_state *state = crtc_state->uapi.state; + struct drm_dp_mst_topology_state *mst_state; + + mst_state = drm_atomic_get_mst_topology_state(state, &intel_dp->mst_mgr); + if (IS_ERR(mst_state)) + return PTR_ERR(mst_state); + crtc_state->lane_count = limits->max_lane_count; + crtc_state->port_clock = limits->max_rate; + + mst_state->pbn_div = drm_dp_get_vc_payload_bw(crtc_state->port_clock, + crtc_state->lane_count); + + return intel_dp_mtp_tu_compute_config(intel_dp, crtc_state, + max_bpp, min_bpp, + conn_state, step, dsc); +} + +static int mst_stream_compute_link_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + struct link_config_limits *limits) +{ /* * FIXME: allocate the BW according to link_bpp, which in the case of * YUV420 is only half of the pipe bpp value. */ - slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, - fxp_q4_to_int(limits->link.max_bpp_x16), - fxp_q4_to_int(limits->link.min_bpp_x16), - limits, - conn_state, 2 * 3, false); - - if (slots < 0) - return slots; - - return 0; + return mst_stream_find_vcpi_slots_for_bpp(intel_dp, crtc_state, + fxp_q4_to_int(limits->link.max_bpp_x16), + fxp_q4_to_int(limits->link.min_bpp_x16), + limits, + conn_state, 2 * 3, false); } -static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - struct link_config_limits *limits) +static int mst_stream_dsc_compute_link_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + struct link_config_limits *limits) { - struct intel_connector *connector = - to_intel_connector(conn_state->connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); - int slots = -EINVAL; + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = to_intel_connector(conn_state->connector); int i, num_bpc; u8 dsc_bpc[3] = {}; int min_bpp, max_bpp, sink_min_bpp, sink_max_bpp; - u8 dsc_max_bpc; int min_compressed_bpp, max_compressed_bpp; - /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ - if (DISPLAY_VER(i915) >= 12) - dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc); - else - dsc_max_bpc = min_t(u8, 10, conn_state->max_requested_bpc); - - max_bpp = min_t(u8, dsc_max_bpc * 3, limits->pipe.max_bpp); + max_bpp = limits->pipe.max_bpp; min_bpp = limits->pipe.min_bpp; num_bpc = drm_dp_dsc_sink_supported_input_bpcs(connector->dp.dsc_dpcd, dsc_bpc); - drm_dbg_kms(&i915->drm, "DSC Source supported min bpp %d max bpp %d\n", + drm_dbg_kms(display->drm, "DSC Source supported min bpp %d max bpp %d\n", min_bpp, max_bpp); sink_max_bpp = dsc_bpc[0] * 3; @@ -378,7 +432,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, sink_max_bpp = dsc_bpc[i] * 3; } - drm_dbg_kms(&i915->drm, "DSC Sink supported min bpp %d max bpp %d\n", + drm_dbg_kms(display->drm, "DSC Sink supported min bpp %d max bpp %d\n", sink_min_bpp, sink_max_bpp); if (min_bpp < sink_min_bpp) @@ -389,41 +443,28 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, crtc_state->pipe_bpp = max_bpp; - max_compressed_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, - crtc_state, - max_bpp / 3); - max_compressed_bpp = min(max_compressed_bpp, - fxp_q4_to_int(limits->link.max_bpp_x16)); - - min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); - min_compressed_bpp = max(min_compressed_bpp, - fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); + max_compressed_bpp = fxp_q4_to_int(limits->link.max_bpp_x16); + min_compressed_bpp = fxp_q4_to_int_roundup(limits->link.min_bpp_x16); - drm_dbg_kms(&i915->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n", + drm_dbg_kms(display->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n", min_compressed_bpp, max_compressed_bpp); /* Align compressed bpps according to our own constraints */ - max_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, max_compressed_bpp, + max_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(display, max_compressed_bpp, crtc_state->pipe_bpp); - min_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, min_compressed_bpp, + min_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(display, min_compressed_bpp, crtc_state->pipe_bpp); - slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, max_compressed_bpp, - min_compressed_bpp, limits, - conn_state, 1, true); - - if (slots < 0) - return slots; - - return 0; + return mst_stream_find_vcpi_slots_for_bpp(intel_dp, crtc_state, max_compressed_bpp, + min_compressed_bpp, limits, + conn_state, 1, true); } -static int intel_dp_mst_update_slots(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state) + +static int mst_stream_update_slots(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_dp *intel_dp = &intel_mst->primary->dp; + struct intel_display *display = to_intel_display(intel_dp); struct drm_dp_mst_topology_mgr *mgr = &intel_dp->mst_mgr; struct drm_dp_mst_topology_state *topology_state; u8 link_coding_cap = intel_dp_is_uhbr(crtc_state) ? @@ -431,7 +472,7 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder, topology_state = drm_atomic_get_mst_topology_state(conn_state->state, mgr); if (IS_ERR(topology_state)) { - drm_dbg_kms(&i915->drm, "slot update failed\n"); + drm_dbg_kms(display->drm, "slot update failed\n"); return PTR_ERR(topology_state); } @@ -474,12 +515,13 @@ hblank_expansion_quirk_needs_dsc(const struct intel_connector *connector, } static bool -adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *connector, +adjust_limits_for_dsc_hblank_expansion_quirk(struct intel_dp *intel_dp, + const struct intel_connector *connector, const struct intel_crtc_state *crtc_state, struct link_config_limits *limits, bool dsc) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); int min_bpp_x16 = limits->link.min_bpp_x16; @@ -487,15 +529,15 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne return true; if (!dsc) { - if (intel_dp_supports_dsc(connector, crtc_state)) { - drm_dbg_kms(&i915->drm, + if (intel_dp_supports_dsc(intel_dp, connector, crtc_state)) { + drm_dbg_kms(display->drm, "[CRTC:%d:%s][CONNECTOR:%d:%s] DSC needed by hblank expansion quirk\n", crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name); return false; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to 24 due to hblank expansion quirk\n", crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name); @@ -508,7 +550,7 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne return true; } - drm_WARN_ON(&i915->drm, limits->min_rate != limits->max_rate); + drm_WARN_ON(display->drm, limits->min_rate != limits->max_rate); if (limits->max_rate < 540000) min_bpp_x16 = fxp_q4_from_int(13); @@ -518,7 +560,7 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne if (limits->link.min_bpp_x16 >= min_bpp_x16) return true; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " FXP_Q4_FMT " in DSC mode due to hblank expansion quirk\n", crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name, @@ -533,56 +575,31 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne } static bool -intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp, - const struct intel_connector *connector, - struct intel_crtc_state *crtc_state, - bool dsc, - struct link_config_limits *limits) -{ - /* - * for MST we always configure max link bw - the spec doesn't - * seem to suggest we should do otherwise. - */ - limits->min_rate = limits->max_rate = - intel_dp_max_link_rate(intel_dp); - - limits->min_lane_count = limits->max_lane_count = - intel_dp_max_lane_count(intel_dp); - - limits->pipe.min_bpp = intel_dp_min_bpp(crtc_state->output_format); - /* - * FIXME: If all the streams can't fit into the link with - * their current pipe_bpp we should reduce pipe_bpp across - * the board until things start to fit. Until then we - * limit to <= 8bpc since that's what was hardcoded for all - * MST streams previously. This hack should be removed once - * we have the proper retry logic in place. - */ - limits->pipe.max_bpp = min(crtc_state->pipe_bpp, 24); - - intel_dp_test_compute_config(intel_dp, crtc_state, limits); - - if (!intel_dp_compute_config_link_bpp_limits(intel_dp, - crtc_state, - dsc, - limits)) +mst_stream_compute_config_limits(struct intel_dp *intel_dp, + const struct intel_connector *connector, + struct intel_crtc_state *crtc_state, + bool dsc, + struct link_config_limits *limits) +{ + if (!intel_dp_compute_config_limits(intel_dp, crtc_state, false, dsc, + limits)) return false; - return adjust_limits_for_dsc_hblank_expansion_quirk(connector, + return adjust_limits_for_dsc_hblank_expansion_quirk(intel_dp, + connector, crtc_state, limits, dsc); } -static int intel_dp_mst_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int mst_stream_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_dp *intel_dp = &intel_mst->primary->dp; + struct intel_dp *intel_dp = to_primary_dp(encoder); struct intel_connector *connector = to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = @@ -609,18 +626,15 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->has_pch_encoder = false; - joiner_needs_dsc = intel_dp_joiner_needs_dsc(dev_priv, num_joined_pipes); + joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !intel_dp_mst_compute_config_limits(intel_dp, - connector, - pipe_config, - false, - &limits); + !mst_stream_compute_config_limits(intel_dp, connector, + pipe_config, false, &limits); if (!dsc_needed) { - ret = intel_dp_mst_compute_link_config(encoder, pipe_config, - conn_state, &limits); + ret = mst_stream_compute_link_config(intel_dp, pipe_config, + conn_state, &limits); if (ret == -EDEADLK) return ret; @@ -629,35 +643,37 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, dsc_needed = true; } + if (dsc_needed && !intel_dp_supports_dsc(intel_dp, connector, pipe_config)) { + drm_dbg_kms(display->drm, "DSC required but not available\n"); + return -EINVAL; + } + /* enable compression if the mode doesn't fit available BW */ if (dsc_needed) { - drm_dbg_kms(&dev_priv->drm, "Try DSC (fallback=%s, joiner=%s, force=%s)\n", + drm_dbg_kms(display->drm, "Try DSC (fallback=%s, joiner=%s, force=%s)\n", str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); - if (!intel_dp_supports_dsc(connector, pipe_config)) - return -EINVAL; - if (!intel_dp_mst_compute_config_limits(intel_dp, - connector, - pipe_config, - true, - &limits)) + if (!mst_stream_compute_config_limits(intel_dp, connector, + pipe_config, true, + &limits)) return -EINVAL; /* * FIXME: As bpc is hardcoded to 8, as mentioned above, * WARN and ignore the debug flag force_dsc_bpc for now. */ - drm_WARN(&dev_priv->drm, intel_dp->force_dsc_bpc, "Cannot Force BPC for MST\n"); + drm_WARN(display->drm, intel_dp->force_dsc_bpc, + "Cannot Force BPC for MST\n"); /* * Try to get at least some timeslots and then see, if * we can fit there with DSC. */ - drm_dbg_kms(&dev_priv->drm, "Trying to find VCPI slots in DSC mode\n"); + drm_dbg_kms(display->drm, "Trying to find VCPI slots in DSC mode\n"); - ret = intel_dp_dsc_mst_compute_link_config(encoder, pipe_config, - conn_state, &limits); + ret = mst_stream_dsc_compute_link_config(intel_dp, pipe_config, + conn_state, &limits); if (ret < 0) return ret; @@ -669,14 +685,14 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, if (ret) return ret; - ret = intel_dp_mst_update_slots(encoder, pipe_config, conn_state); + ret = mst_stream_update_slots(intel_dp, pipe_config, conn_state); if (ret) return ret; pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) + if (display->platform.geminilake || display->platform.broxton) pipe_config->lane_lat_optim_mask = bxt_dpio_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); @@ -698,13 +714,13 @@ static unsigned int intel_dp_mst_transcoder_mask(struct intel_atomic_state *state, struct intel_dp *mst_port) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_digital_connector_state *conn_state; struct intel_connector *connector; u8 transcoders = 0; int i; - if (DISPLAY_VER(dev_priv) < 12) + if (DISPLAY_VER(display) < 12) return 0; for_each_new_intel_connector_in_state(state, connector, conn_state, i) { @@ -758,7 +774,7 @@ static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state, struct drm_dp_mst_topology_mgr *mst_mgr, struct intel_link_bw_limits *limits) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_crtc *crtc; u8 mst_pipe_mask; u8 fec_pipe_mask = 0; @@ -766,12 +782,12 @@ static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state, mst_pipe_mask = get_pipes_downstream_of_mst_port(state, mst_mgr, NULL); - for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, mst_pipe_mask) { + for_each_intel_crtc_in_pipe_mask(display->drm, crtc, mst_pipe_mask) { struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); /* Atomic connector check should've added all the MST CRTCs. */ - if (drm_WARN_ON(&i915->drm, !crtc_state)) + if (drm_WARN_ON(display->drm, !crtc_state)) return -EINVAL; if (crtc_state->fec_enable) @@ -850,13 +866,12 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, return 0; } -static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state) +static int mst_stream_compute_config_late(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) { struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_dp *intel_dp = &intel_mst->primary->dp; + struct intel_dp *intel_dp = to_primary_dp(encoder); /* lowest numbered transcoder will be designated master */ crtc_state->mst_master_transcoder = @@ -879,10 +894,10 @@ static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder, * recomputation of the corresponding CRTC states. */ static int -intel_dp_mst_atomic_topology_check(struct intel_connector *connector, - struct intel_atomic_state *state) +mst_connector_atomic_topology_check(struct intel_connector *connector, + struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_connector_list_iter connector_list_iter; struct intel_connector *connector_iter; int ret = 0; @@ -890,7 +905,7 @@ intel_dp_mst_atomic_topology_check(struct intel_connector *connector, if (!intel_connector_needs_modeset(state, &connector->base)) return 0; - drm_connector_list_iter_begin(&dev_priv->drm, &connector_list_iter); + drm_connector_list_iter_begin(display->drm, &connector_list_iter); for_each_intel_connector_iter(connector_iter, &connector_list_iter) { struct intel_digital_connector_state *conn_iter_state; struct intel_crtc_state *crtc_state; @@ -928,8 +943,8 @@ intel_dp_mst_atomic_topology_check(struct intel_connector *connector, } static int -intel_dp_mst_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *_state) +mst_connector_atomic_check(struct drm_connector *connector, + struct drm_atomic_state *_state) { struct intel_atomic_state *state = to_intel_atomic_state(_state); struct intel_connector *intel_connector = @@ -940,7 +955,7 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, if (ret) return ret; - ret = intel_dp_mst_atomic_topology_check(intel_connector, state); + ret = mst_connector_atomic_topology_check(intel_connector, state); if (ret) return ret; @@ -957,42 +972,18 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, intel_connector->port); } -static void clear_act_sent(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - - intel_de_write(i915, dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_ACT_SENT); -} - -static void wait_for_act_sent(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_dp *intel_dp = &intel_mst->primary->dp; - - if (intel_de_wait_for_set(i915, dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_ACT_SENT, 1)) - drm_err(&i915->drm, "Timed out waiting for ACT sent\n"); - - drm_dp_check_act_status(&intel_dp->mst_mgr); -} - -static void intel_mst_disable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void mst_stream_disable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { + struct intel_display *display = to_intel_display(state); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; + struct intel_dp *intel_dp = to_primary_dp(encoder); struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); - drm_dbg_kms(&i915->drm, "active links %d\n", + drm_dbg_kms(display->drm, "active links %d\n", intel_dp->active_mst_links); if (intel_dp->active_mst_links == 1) @@ -1003,15 +994,15 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, intel_dp_sink_disable_decompression(state, connector, old_crtc_state); } -static void intel_mst_post_disable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void mst_stream_post_disable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_display *display = to_intel_display(encoder); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); + struct intel_dp *intel_dp = to_primary_dp(encoder); struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_dp_mst_topology_state *old_mst_state = @@ -1022,15 +1013,13 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, drm_atomic_get_mst_payload_state(old_mst_state, connector->port); struct drm_dp_mst_atomic_payload *new_payload = drm_atomic_get_mst_payload_state(new_mst_state, connector->port); - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_crtc *pipe_crtc; bool last_mst_stream; int i; intel_dp->active_mst_links--; last_mst_stream = intel_dp->active_mst_links == 0; - drm_WARN_ON(&dev_priv->drm, - DISPLAY_VER(dev_priv) >= 12 && last_mst_stream && + drm_WARN_ON(display->drm, DISPLAY_VER(display) >= 12 && last_mst_stream && !intel_dp_mst_is_master_trans(old_crtc_state)); for_each_pipe_crtc_modeset_disable(display, pipe_crtc, old_crtc_state, i) { @@ -1044,13 +1033,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, drm_dp_remove_payload_part1(&intel_dp->mst_mgr, new_mst_state, new_payload); - clear_act_sent(encoder, old_crtc_state); + intel_ddi_clear_act_sent(encoder, old_crtc_state); - intel_de_rmw(dev_priv, - TRANS_DDI_FUNC_CTL(dev_priv, old_crtc_state->cpu_transcoder), + intel_de_rmw(display, + TRANS_DDI_FUNC_CTL(display, old_crtc_state->cpu_transcoder), TRANS_DDI_DP_VC_PAYLOAD_ALLOC, 0); - wait_for_act_sent(encoder, old_crtc_state); + intel_ddi_wait_for_act_sent(encoder, old_crtc_state); + drm_dp_check_act_status(&intel_dp->mst_mgr); drm_dp_remove_payload_part2(&intel_dp->mst_mgr, new_mst_state, old_payload, new_payload); @@ -1063,7 +1053,7 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, intel_dsc_disable(old_pipe_crtc_state); - if (DISPLAY_VER(dev_priv) >= 9) + if (DISPLAY_VER(display) >= 9) skl_scaler_disable(old_pipe_crtc_state); else ilk_pfit_disable(old_pipe_crtc_state); @@ -1080,8 +1070,7 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, * BSpec 4287: disable DIP after the transcoder is disabled and before * the transcoder clock select is set to none. */ - intel_dp_set_infoframes(&dig_port->base, false, - old_crtc_state, NULL); + intel_dp_set_infoframes(primary_encoder, false, old_crtc_state, NULL); /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" @@ -1089,51 +1078,49 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, * From older GENs spec: "Configure Transcoder Clock Select to direct * no clock to the transcoder" */ - if (DISPLAY_VER(dev_priv) < 12 || !last_mst_stream) + if (DISPLAY_VER(display) < 12 || !last_mst_stream) intel_ddi_disable_transcoder_clock(old_crtc_state); intel_mst->connector = NULL; if (last_mst_stream) - dig_port->base.post_disable(state, &dig_port->base, - old_crtc_state, NULL); + primary_encoder->post_disable(state, primary_encoder, + old_crtc_state, NULL); - drm_dbg_kms(&dev_priv->drm, "active links %d\n", + drm_dbg_kms(display->drm, "active links %d\n", intel_dp->active_mst_links); } -static void intel_mst_post_pll_disable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void mst_stream_post_pll_disable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); + struct intel_dp *intel_dp = to_primary_dp(encoder); if (intel_dp->active_mst_links == 0 && - dig_port->base.post_pll_disable) - dig_port->base.post_pll_disable(state, encoder, old_crtc_state, old_conn_state); + primary_encoder->post_pll_disable) + primary_encoder->post_pll_disable(state, primary_encoder, old_crtc_state, old_conn_state); } -static void intel_mst_pre_pll_enable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void mst_stream_pre_pll_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); + struct intel_dp *intel_dp = to_primary_dp(encoder); if (intel_dp->active_mst_links == 0) - dig_port->base.pre_pll_enable(state, &dig_port->base, - pipe_config, NULL); + primary_encoder->pre_pll_enable(state, primary_encoder, + pipe_config, NULL); else /* * The port PLL state needs to get updated for secondary * streams as for the primary stream. */ - intel_ddi_update_active_dpll(state, &dig_port->base, + intel_ddi_update_active_dpll(state, primary_encoder, to_intel_crtc(pipe_config->uapi.crtc)); } @@ -1164,15 +1151,15 @@ static void intel_mst_reprobe_topology(struct intel_dp *intel_dp, crtc_state->port_clock, crtc_state->lane_count); } -static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void mst_stream_pre_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(state); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); + struct intel_dp *intel_dp = to_primary_dp(encoder); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_dp_mst_topology_state *mst_state = @@ -1186,11 +1173,10 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, connector->encoder = encoder; intel_mst->connector = connector; first_mst_stream = intel_dp->active_mst_links == 0; - drm_WARN_ON(&dev_priv->drm, - DISPLAY_VER(dev_priv) >= 12 && first_mst_stream && + drm_WARN_ON(display->drm, DISPLAY_VER(display) >= 12 && first_mst_stream && !intel_dp_mst_is_master_trans(pipe_config)); - drm_dbg_kms(&dev_priv->drm, "active links %d\n", + drm_dbg_kms(display->drm, "active links %d\n", intel_dp->active_mst_links); if (first_mst_stream) @@ -1201,8 +1187,8 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, intel_dp_sink_enable_decompression(state, connector, pipe_config); if (first_mst_stream) { - dig_port->base.pre_enable(state, &dig_port->base, - pipe_config, NULL); + primary_encoder->pre_enable(state, primary_encoder, + pipe_config, NULL); intel_mst_reprobe_topology(intel_dp, pipe_config); } @@ -1212,24 +1198,28 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, ret = drm_dp_add_payload_part1(&intel_dp->mst_mgr, mst_state, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (ret < 0) - intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); + intel_dp_queue_modeset_retry_for_link(state, primary_encoder, pipe_config); /* * Before Gen 12 this is not done as part of - * dig_port->base.pre_enable() and should be done here. For + * primary_encoder->pre_enable() and should be done here. For * Gen 12+ the step in which this should be done is different for the * first MST stream, so it's done on the DDI for the first stream and * here for the following ones. */ - if (DISPLAY_VER(dev_priv) < 12 || !first_mst_stream) + if (DISPLAY_VER(display) < 12 || !first_mst_stream) intel_ddi_enable_transcoder_clock(encoder, pipe_config); - intel_dsc_dp_pps_write(&dig_port->base, pipe_config); + if (DISPLAY_VER(display) >= 13 && !first_mst_stream) + intel_ddi_config_transcoder_func(encoder, pipe_config); + + intel_dsc_dp_pps_write(primary_encoder, pipe_config); intel_ddi_set_dp_msa(pipe_config, conn_state); } static void enable_bs_jitter_was(const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); u32 clear = 0; u32 set = 0; @@ -1237,7 +1227,7 @@ static void enable_bs_jitter_was(const struct intel_crtc_state *crtc_state) if (!IS_ALDERLAKE_P(i915)) return; - if (!IS_DISPLAY_STEP(i915, STEP_D0, STEP_FOREVER)) + if (!IS_DISPLAY_STEP(display, STEP_D0, STEP_FOREVER)) return; /* Wa_14013163432:adlp */ @@ -1245,7 +1235,7 @@ static void enable_bs_jitter_was(const struct intel_crtc_state *crtc_state) set |= DP_MST_FEC_BS_JITTER_WA(crtc_state->cpu_transcoder); /* Wa_14014143976:adlp */ - if (IS_DISPLAY_STEP(i915, STEP_E0, STEP_FOREVER)) { + if (IS_DISPLAY_STEP(display, STEP_E0, STEP_FOREVER)) { if (intel_dp_is_uhbr(crtc_state)) set |= DP_MST_SHORT_HBLANK_WA(crtc_state->cpu_transcoder); else if (crtc_state->fec_enable) @@ -1258,20 +1248,18 @@ static void enable_bs_jitter_was(const struct intel_crtc_state *crtc_state) if (!clear && !set) return; - intel_de_rmw(i915, CHICKEN_MISC_3, clear, set); + intel_de_rmw(display, CHICKEN_MISC_3, clear, set); } -static void intel_mst_enable_dp(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void mst_stream_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; - struct intel_dp *intel_dp = &dig_port->dp; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); + struct intel_dp *intel_dp = to_primary_dp(encoder); struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr); enum transcoder trans = pipe_config->cpu_transcoder; @@ -1279,16 +1267,16 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, struct intel_crtc *pipe_crtc; int ret, i; - drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); + drm_WARN_ON(display->drm, pipe_config->has_pch_encoder); if (intel_dp_is_uhbr(pipe_config)) { const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; u64 crtc_clock_hz = KHz(adjusted_mode->crtc_clock); - intel_de_write(dev_priv, TRANS_DP2_VFREQHIGH(pipe_config->cpu_transcoder), + intel_de_write(display, TRANS_DP2_VFREQHIGH(pipe_config->cpu_transcoder), TRANS_DP2_VFREQ_PIXEL_CLOCK(crtc_clock_hz >> 24)); - intel_de_write(dev_priv, TRANS_DP2_VFREQLOW(pipe_config->cpu_transcoder), + intel_de_write(display, TRANS_DP2_VFREQLOW(pipe_config->cpu_transcoder), TRANS_DP2_VFREQ_PIXEL_CLOCK(crtc_clock_hz & 0xffffff)); } @@ -1296,15 +1284,16 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); - clear_act_sent(encoder, pipe_config); + intel_ddi_clear_act_sent(encoder, pipe_config); - intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, trans), 0, + intel_de_rmw(display, TRANS_DDI_FUNC_CTL(display, trans), 0, TRANS_DDI_DP_VC_PAYLOAD_ALLOC); - drm_dbg_kms(&dev_priv->drm, "active links %d\n", + drm_dbg_kms(display->drm, "active links %d\n", intel_dp->active_mst_links); - wait_for_act_sent(encoder, pipe_config); + intel_ddi_wait_for_act_sent(encoder, pipe_config); + drm_dp_check_act_status(&intel_dp->mst_mgr); if (first_mst_stream) intel_ddi_wait_for_fec_status(encoder, pipe_config, true); @@ -1313,10 +1302,10 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (ret < 0) - intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); + intel_dp_queue_modeset_retry_for_link(state, primary_encoder, pipe_config); - if (DISPLAY_VER(dev_priv) >= 12) - intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, trans), + if (DISPLAY_VER(display) >= 12) + intel_de_rmw(display, CHICKEN_TRANS(display, trans), FECSTALL_DIS_DPTSTREAM_DPTTG, pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0); @@ -1334,8 +1323,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_hdcp_enable(state, encoder, pipe_config, conn_state); } -static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, - enum pipe *pipe) +static bool mst_stream_get_hw_state(struct intel_encoder *encoder, + enum pipe *pipe) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); *pipe = intel_mst->pipe; @@ -1344,28 +1333,26 @@ static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, return false; } -static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) +static void mst_stream_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); - dig_port->base.get_config(&dig_port->base, pipe_config); + primary_encoder->get_config(primary_encoder, pipe_config); } -static bool intel_dp_mst_initial_fastset_check(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state) +static bool mst_stream_initial_fastset_check(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); - struct intel_digital_port *dig_port = intel_mst->primary; + struct intel_encoder *primary_encoder = to_primary_encoder(encoder); - return intel_dp_initial_fastset_check(&dig_port->base, crtc_state); + return intel_dp_initial_fastset_check(primary_encoder, crtc_state); } -static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) +static int mst_connector_get_ddc_modes(struct drm_connector *connector) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); - struct drm_i915_private *i915 = to_i915(intel_connector->base.dev); struct intel_dp *intel_dp = intel_connector->mst_port; const struct drm_edid *drm_edid; int ret; @@ -1373,7 +1360,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) if (drm_connector_is_unregistered(connector)) return intel_connector_update_modes(connector, NULL); - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return drm_edid_connector_add_modes(connector); drm_edid = drm_dp_mst_edid_read(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -1386,7 +1373,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) } static int -intel_dp_mst_connector_late_register(struct drm_connector *connector) +mst_connector_late_register(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); int ret; @@ -1405,7 +1392,7 @@ intel_dp_mst_connector_late_register(struct drm_connector *connector) } static void -intel_dp_mst_connector_early_unregister(struct drm_connector *connector) +mst_connector_early_unregister(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -1414,35 +1401,36 @@ intel_dp_mst_connector_early_unregister(struct drm_connector *connector) intel_connector->port); } -static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { +static const struct drm_connector_funcs mst_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, - .late_register = intel_dp_mst_connector_late_register, - .early_unregister = intel_dp_mst_connector_early_unregister, + .late_register = mst_connector_late_register, + .early_unregister = mst_connector_early_unregister, .destroy = intel_connector_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; -static int intel_dp_mst_get_modes(struct drm_connector *connector) +static int mst_connector_get_modes(struct drm_connector *connector) { - return intel_dp_mst_get_ddc_modes(connector); + return mst_connector_get_ddc_modes(connector); } static int -intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, +mst_connector_mode_valid_ctx(struct drm_connector *connector, const struct drm_display_mode *mode, struct drm_modeset_acquire_ctx *ctx, enum drm_mode_status *status) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; struct drm_dp_mst_topology_mgr *mgr = &intel_dp->mst_mgr; struct drm_dp_mst_port *port = intel_connector->port; const int min_bpp = 18; - int max_dotclk = to_i915(connector->dev)->display.cdclk.max_dotclk_freq; + int max_dotclk = display->cdclk.max_dotclk_freq; int max_rate, mode_rate, max_lanes, max_link_clock; int ret; bool dsc = false; @@ -1512,7 +1500,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (drm_dp_sink_supports_fec(intel_connector->dp.fec_capability)) { dsc_max_compressed_bpp = - intel_dp_dsc_get_max_compressed_bpp(dev_priv, + intel_dp_dsc_get_max_compressed_bpp(display, max_link_clock, max_lanes, target_clock, @@ -1530,7 +1518,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, dsc = dsc_max_compressed_bpp && dsc_slice_count; } - if (intel_dp_joiner_needs_dsc(dev_priv, num_joined_pipes) && !dsc) { + if (intel_dp_joiner_needs_dsc(display, num_joined_pipes) && !dsc) { *status = MODE_CLOCK_HIGH; return 0; } @@ -1544,8 +1532,9 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } -static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector, - struct drm_atomic_state *state) +static struct drm_encoder * +mst_connector_atomic_best_encoder(struct drm_connector *connector, + struct drm_atomic_state *state) { struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state, connector); @@ -1557,20 +1546,20 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c } static int -intel_dp_mst_detect(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, bool force) +mst_connector_detect_ctx(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, bool force) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (!intel_display_device_enabled(i915)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; if (drm_connector_is_unregistered(connector)) return connector_status_disconnected; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return connector->status; intel_dp_flush_connector_commits(intel_connector); @@ -1579,15 +1568,15 @@ intel_dp_mst_detect(struct drm_connector *connector, intel_connector->port); } -static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_funcs = { - .get_modes = intel_dp_mst_get_modes, - .mode_valid_ctx = intel_dp_mst_mode_valid_ctx, - .atomic_best_encoder = intel_mst_atomic_best_encoder, - .atomic_check = intel_dp_mst_atomic_check, - .detect_ctx = intel_dp_mst_detect, +static const struct drm_connector_helper_funcs mst_connector_helper_funcs = { + .get_modes = mst_connector_get_modes, + .mode_valid_ctx = mst_connector_mode_valid_ctx, + .atomic_best_encoder = mst_connector_atomic_best_encoder, + .atomic_check = mst_connector_atomic_check, + .detect_ctx = mst_connector_detect_ctx, }; -static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder) +static void mst_stream_encoder_destroy(struct drm_encoder *encoder) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(to_intel_encoder(encoder)); @@ -1595,31 +1584,32 @@ static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder) kfree(intel_mst); } -static const struct drm_encoder_funcs intel_dp_mst_enc_funcs = { - .destroy = intel_dp_mst_encoder_destroy, +static const struct drm_encoder_funcs mst_stream_encoder_funcs = { + .destroy = mst_stream_encoder_destroy, }; -static bool intel_dp_mst_get_hw_state(struct intel_connector *connector) +static bool mst_connector_get_hw_state(struct intel_connector *connector) { - if (intel_attached_encoder(connector) && connector->base.state->crtc) { - enum pipe pipe; - if (!intel_attached_encoder(connector)->get_hw_state(intel_attached_encoder(connector), &pipe)) - return false; - return true; - } - return false; + /* This is the MST stream encoder set in ->pre_enable, if any */ + struct intel_encoder *encoder = intel_attached_encoder(connector); + enum pipe pipe; + + if (!encoder || !connector->base.state->crtc) + return false; + + return encoder->get_hw_state(encoder, &pipe); } -static int intel_dp_mst_add_properties(struct intel_dp *intel_dp, - struct drm_connector *connector, - const char *pathprop) +static int mst_topology_add_connector_properties(struct intel_dp *intel_dp, + struct drm_connector *connector, + const char *pathprop) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(intel_dp); drm_object_attach_property(&connector->base, - i915->drm.mode_config.path_property, 0); + display->drm->mode_config.path_property, 0); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tile_property, 0); + display->drm->mode_config.tile_property, 0); intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); @@ -1653,7 +1643,7 @@ intel_dp_mst_read_decompression_port_dsc_caps(struct intel_dp *intel_dp, static bool detect_dsc_hblank_expansion_quirk(const struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_dp_aux *aux = connector->dp.dsc_decompression_aux; struct drm_dp_desc desc; u8 dpcd[DP_RECEIVER_CAP_SIZE]; @@ -1691,21 +1681,21 @@ static bool detect_dsc_hblank_expansion_quirk(const struct intel_connector *conn !(dpcd[DP_RECEIVE_PORT_0_CAP_0] & DP_HBLANK_EXPANSION_CAPABLE)) return false; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] DSC HBLANK expansion quirk detected\n", connector->base.base.id, connector->base.name); return true; } -static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_port *port, - const char *pathprop) +static struct drm_connector * +mst_topology_add_connector(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + const char *pathprop) { struct intel_dp *intel_dp = container_of(mgr, struct intel_dp, mst_mgr); + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_connector *intel_connector; struct drm_connector *connector; enum pipe pipe; @@ -1717,7 +1707,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo connector = &intel_connector->base; - intel_connector->get_hw_state = intel_dp_mst_get_hw_state; + intel_connector->get_hw_state = mst_connector_get_hw_state; intel_connector->sync_state = intel_dp_connector_sync_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; @@ -1725,7 +1715,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_dp_init_modeset_retry_work(intel_connector); - ret = drm_connector_dynamic_init(&dev_priv->drm, connector, &intel_dp_mst_connector_funcs, + ret = drm_connector_dynamic_init(display->drm, connector, &mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort, NULL); if (ret) { drm_dp_mst_put_port_malloc(port); @@ -1738,9 +1728,9 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_connector->dp.dsc_hblank_expansion_quirk = detect_dsc_hblank_expansion_quirk(intel_connector); - drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); + drm_connector_helper_add(connector, &mst_connector_helper_funcs); - for_each_pipe(dev_priv, pipe) { + for_each_pipe(display, pipe) { struct drm_encoder *enc = &intel_dp->mst_encoders[pipe]->base.base; @@ -1749,13 +1739,13 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo goto err; } - ret = intel_dp_mst_add_properties(intel_dp, connector, pathprop); + ret = mst_topology_add_connector_properties(intel_dp, connector, pathprop); if (ret) goto err; ret = intel_dp_hdcp_init(dig_port, intel_connector); if (ret) - drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", + drm_dbg_kms(display->drm, "[%s:%d] HDCP MST init failed, skipping.\n", connector->name, connector->base.id); return connector; @@ -1766,24 +1756,26 @@ err: } static void -intel_dp_mst_poll_hpd_irq(struct drm_dp_mst_topology_mgr *mgr) +mst_topology_poll_hpd_irq(struct drm_dp_mst_topology_mgr *mgr) { struct intel_dp *intel_dp = container_of(mgr, struct intel_dp, mst_mgr); intel_hpd_trigger_irq(dp_to_dig_port(intel_dp)); } -static const struct drm_dp_mst_topology_cbs mst_cbs = { - .add_connector = intel_dp_add_mst_connector, - .poll_hpd_irq = intel_dp_mst_poll_hpd_irq, +static const struct drm_dp_mst_topology_cbs mst_topology_cbs = { + .add_connector = mst_topology_add_connector, + .poll_hpd_irq = mst_topology_poll_hpd_irq, }; +/* Create a fake encoder for an individual MST stream */ static struct intel_dp_mst_encoder * -intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe pipe) +mst_stream_encoder_create(struct intel_digital_port *dig_port, enum pipe pipe) { + struct intel_display *display = to_intel_display(dig_port); + struct intel_encoder *primary_encoder = &dig_port->base; struct intel_dp_mst_encoder *intel_mst; - struct intel_encoder *intel_encoder; - struct drm_device *dev = dig_port->base.base.dev; + struct intel_encoder *encoder; intel_mst = kzalloc(sizeof(*intel_mst), GFP_KERNEL); @@ -1791,16 +1783,16 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe return NULL; intel_mst->pipe = pipe; - intel_encoder = &intel_mst->base; + encoder = &intel_mst->base; intel_mst->primary = dig_port; - drm_encoder_init(dev, &intel_encoder->base, &intel_dp_mst_enc_funcs, + drm_encoder_init(display->drm, &encoder->base, &mst_stream_encoder_funcs, DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); - intel_encoder->type = INTEL_OUTPUT_DP_MST; - intel_encoder->power_domain = dig_port->base.power_domain; - intel_encoder->port = dig_port->base.port; - intel_encoder->cloneable = 0; + encoder->type = INTEL_OUTPUT_DP_MST; + encoder->power_domain = primary_encoder->power_domain; + encoder->port = primary_encoder->port; + encoder->cloneable = 0; /* * This is wrong, but broken userspace uses the intersection * of possible_crtcs of all the encoders of a given connector @@ -1809,36 +1801,37 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe * To keep such userspace functioning we must misconfigure * this to make sure the intersection is not empty :( */ - intel_encoder->pipe_mask = ~0; - - intel_encoder->compute_config = intel_dp_mst_compute_config; - intel_encoder->compute_config_late = intel_dp_mst_compute_config_late; - intel_encoder->disable = intel_mst_disable_dp; - intel_encoder->post_disable = intel_mst_post_disable_dp; - intel_encoder->post_pll_disable = intel_mst_post_pll_disable_dp; - intel_encoder->update_pipe = intel_ddi_update_pipe; - intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; - intel_encoder->pre_enable = intel_mst_pre_enable_dp; - intel_encoder->enable = intel_mst_enable_dp; - intel_encoder->audio_enable = intel_audio_codec_enable; - intel_encoder->audio_disable = intel_audio_codec_disable; - intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state; - intel_encoder->get_config = intel_dp_mst_enc_get_config; - intel_encoder->initial_fastset_check = intel_dp_mst_initial_fastset_check; + encoder->pipe_mask = ~0; + + encoder->compute_config = mst_stream_compute_config; + encoder->compute_config_late = mst_stream_compute_config_late; + encoder->disable = mst_stream_disable; + encoder->post_disable = mst_stream_post_disable; + encoder->post_pll_disable = mst_stream_post_pll_disable; + encoder->update_pipe = intel_ddi_update_pipe; + encoder->pre_pll_enable = mst_stream_pre_pll_enable; + encoder->pre_enable = mst_stream_pre_enable; + encoder->enable = mst_stream_enable; + encoder->audio_enable = intel_audio_codec_enable; + encoder->audio_disable = intel_audio_codec_disable; + encoder->get_hw_state = mst_stream_get_hw_state; + encoder->get_config = mst_stream_get_config; + encoder->initial_fastset_check = mst_stream_initial_fastset_check; return intel_mst; } +/* Create the fake encoders for MST streams */ static bool -intel_dp_create_fake_mst_encoders(struct intel_digital_port *dig_port) +mst_stream_encoders_create(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_dp *intel_dp = &dig_port->dp; - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum pipe pipe; - for_each_pipe(dev_priv, pipe) - intel_dp->mst_encoders[pipe] = intel_dp_create_fake_mst_encoder(dig_port, pipe); + for_each_pipe(display, pipe) + intel_dp->mst_encoders[pipe] = mst_stream_encoder_create(dig_port, pipe); return true; } @@ -1851,25 +1844,25 @@ intel_dp_mst_encoder_active_links(struct intel_digital_port *dig_port) int intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id) { - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_display *display = to_intel_display(dig_port); struct intel_dp *intel_dp = &dig_port->dp; enum port port = dig_port->base.port; int ret; - if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp)) + if (!HAS_DP_MST(display) || intel_dp_is_edp(intel_dp)) return 0; - if (DISPLAY_VER(i915) < 12 && port == PORT_A) + if (DISPLAY_VER(display) < 12 && port == PORT_A) return 0; - if (DISPLAY_VER(i915) < 11 && port == PORT_E) + if (DISPLAY_VER(display) < 11 && port == PORT_E) return 0; - intel_dp->mst_mgr.cbs = &mst_cbs; + intel_dp->mst_mgr.cbs = &mst_topology_cbs; /* create encoders */ - intel_dp_create_fake_mst_encoders(dig_port); - ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm, + mst_stream_encoders_create(dig_port); + ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, display->drm, &intel_dp->aux, 16, 3, conn_base_id); if (ret) { intel_dp->mst_mgr.cbs = NULL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 8343804ce3f8..c6bdc1d190a4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -8,6 +8,7 @@ #include <linux/types.h> +struct drm_connector_state; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; @@ -30,4 +31,10 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp); bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp); +int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + int max_bpp, int min_bpp, + struct drm_connector_state *conn_state, + int step, bool dsc); + #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_test.c b/drivers/gpu/drm/i915/display/intel_dp_test.c index e05819300d77..380b359b0420 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_test.c +++ b/drivers/gpu/drm/i915/display/intel_dp_test.c @@ -8,7 +8,6 @@ #include <drm/drm_edid.h> #include <drm/drm_probe_helper.h> -#include "i915_drv.h" #include "i915_reg.h" #include "intel_ddi.h" #include "intel_de.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c index 94198bc04939..589872babdd7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c @@ -3,11 +3,10 @@ * Copyright © 2023 Intel Corporation */ -#include "i915_drv.h" - #include <drm/display/drm_dp_tunnel.h> #include "intel_atomic.h" +#include "intel_display_core.h" #include "intel_display_limits.h" #include "intel_display_types.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 0f12f2c3467c..52a36a2281e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -22,6 +22,7 @@ */ #include "bxt_dpio_phy_regs.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" @@ -855,6 +856,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, void chv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -871,7 +873,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, */ if (ch == DPIO_CH0 && pipe == PIPE_B) dig_port->release_cl2_override = - !chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, true); + !chv_phy_powergate_ch(display, DPIO_PHY0, DPIO_CH1, true); chv_phy_powergate_lanes(encoder, true, lane_mask); @@ -1013,11 +1015,11 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, void chv_phy_release_cl2_override(struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (dig_port->release_cl2_override) { - chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, false); + chv_phy_powergate_ch(display, DPIO_PHY0, DPIO_CH1, false); dig_port->release_cl2_override = false; } } diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 198ceda790d2..3256b1293f7f 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/string_helpers.h> +#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_crtc.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e60497bb8a94..d86cc9ffd4ac 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -25,6 +25,7 @@ #include <linux/string_helpers.h> #include "bxt_dpio_phy_regs.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_de.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index ce8c76e44e6a..8b1f0e92a11c 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -205,7 +205,7 @@ void intel_dpt_resume(struct drm_i915_private *i915) struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb); if (fb->dpt_vm) - i915_ggtt_resume_vm(fb->dpt_vm); + i915_ggtt_resume_vm(fb->dpt_vm, true); } mutex_unlock(&i915->drm.mode_config.fb_lock); } @@ -233,7 +233,7 @@ void intel_dpt_suspend(struct drm_i915_private *i915) struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb); if (fb->dpt_vm) - i915_ggtt_suspend_vm(fb->dpt_vm); + i915_ggtt_suspend_vm(fb->dpt_vm, true); } mutex_unlock(&i915->drm.mode_config.fb_lock); diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.c b/drivers/gpu/drm/i915/display/intel_dpt_common.c index 573f72068899..d2dede0a5229 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt_common.c +++ b/drivers/gpu/drm/i915/display/intel_dpt_common.c @@ -3,6 +3,7 @@ * Copyright © 2023 Intel Corporation */ +#include "i915_drv.h" #include "i915_reg.h" #include "intel_de.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index bb39eb96e812..0fec01b79b23 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -68,7 +68,9 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type) bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, enum transcoder cpu_transcoder) { - if (HAS_DOUBLE_BUFFERED_M_N(i915)) + struct intel_display *display = &i915->display; + + if (HAS_DOUBLE_BUFFERED_M_N(display)) return true; return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index b7b44399adaa..e6f8fc743fb4 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -256,15 +256,6 @@ static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb, return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg); } -static bool intel_dsb_prev_ins_is_mmio_write(struct intel_dsb *dsb, i915_reg_t reg) -{ - /* only full byte-enables can be converted to indexed writes */ - return intel_dsb_prev_ins_is_write(dsb, - DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT | - DSB_BYTE_EN << DSB_BYTE_EN_SHIFT, - reg); -} - static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_t reg) { return intel_dsb_prev_ins_is_write(dsb, @@ -273,16 +264,20 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_ } /** - * intel_dsb_reg_write() - Emit register wriite to the DSB context + * intel_dsb_reg_write_indexed() - Emit indexed register write to the DSB context * @dsb: DSB context * @reg: register address. * @val: value. * * This function is used for writing register-value pair in command * buffer of DSB. + * + * Note that indexed writes are slower than normal MMIO writes + * for a small number (less than 5 or so) of writes to the same + * register. */ -void intel_dsb_reg_write(struct intel_dsb *dsb, - i915_reg_t reg, u32 val) +void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, + i915_reg_t reg, u32 val) { /* * For example the buffer will look like below for 3 dwords for auto @@ -300,44 +295,32 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, * we are writing odd no of dwords, Zeros will be added in the end for * padding. */ - if (!intel_dsb_prev_ins_is_mmio_write(dsb, reg) && - !intel_dsb_prev_ins_is_indexed_write(dsb, reg)) { - intel_dsb_emit(dsb, val, - (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | - (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + if (!intel_dsb_prev_ins_is_indexed_write(dsb, reg)) + intel_dsb_emit(dsb, 0, /* count */ + (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) | i915_mmio_reg_offset(reg)); - } else { - if (!assert_dsb_has_room(dsb)) - return; - - /* convert to indexed write? */ - if (intel_dsb_prev_ins_is_mmio_write(dsb, reg)) { - u32 prev_val = dsb->ins[0]; - - dsb->ins[0] = 1; /* count */ - dsb->ins[1] = (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) | - i915_mmio_reg_offset(reg); - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 0, - dsb->ins[0]); - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 1, - dsb->ins[1]); - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 2, - prev_val); + if (!assert_dsb_has_room(dsb)) + return; - dsb->free_pos++; - } + /* Update the count */ + dsb->ins[0]++; + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 0, + dsb->ins[0]); - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val); - /* Update the count */ - dsb->ins[0]++; - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 0, - dsb->ins[0]); + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val); + /* if number of data words is odd, then the last dword should be 0.*/ + if (dsb->free_pos & 0x1) + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0); +} - /* if number of data words is odd, then the last dword should be 0.*/ - if (dsb->free_pos & 0x1) - intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0); - } +void intel_dsb_reg_write(struct intel_dsb *dsb, + i915_reg_t reg, u32 val) +{ + intel_dsb_emit(dsb, val, + (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + i915_mmio_reg_offset(reg)); } static u32 intel_dsb_mask_to_byte_en(u32 mask) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index 33e0fc2ab380..da6df07a3c83 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb); void intel_dsb_cleanup(struct intel_dsb *dsb); void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, + i915_reg_t reg, u32 val); void intel_dsb_reg_write_masked(struct intel_dsb *dsb, i915_reg_t reg, u32 mask, u32 val); void intel_dsb_noop(struct intel_dsb *dsb, int count); diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index e8129a720210..b2b78f39cfd3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -745,6 +745,23 @@ void intel_dsi_log_params(struct intel_dsi *intel_dsi) str_enabled_disabled(!(intel_dsi->video_frmt_cfg_bits & DISABLE_VIDEO_BTA))); } +static enum mipi_dsi_pixel_format vbt_to_dsi_pixel_format(unsigned int format) +{ + switch (format) { + case PIXEL_FORMAT_RGB888: + return MIPI_DSI_FMT_RGB888; + case PIXEL_FORMAT_RGB666_LOOSELY_PACKED: + return MIPI_DSI_FMT_RGB666; + case PIXEL_FORMAT_RGB666: + return MIPI_DSI_FMT_RGB666_PACKED; + case PIXEL_FORMAT_RGB565: + return MIPI_DSI_FMT_RGB565; + default: + MISSING_CASE(format); + return MIPI_DSI_FMT_RGB666; + } +} + bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) { struct drm_device *dev = intel_dsi->base.base.dev; @@ -762,8 +779,7 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->clock_stop = mipi_config->enable_clk_stop ? 1 : 0; intel_dsi->lane_count = mipi_config->lane_cnt + 1; intel_dsi->pixel_format = - pixel_format_from_register_bits( - mipi_config->videomode_color_format << 7); + vbt_to_dsi_pixel_format(mipi_config->videomode_color_format); intel_dsi->dual_link = mipi_config->dual_link; intel_dsi->pixel_overlap = mipi_config->pixel_overlap; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index f425bea748c4..c310698a1a86 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -318,6 +318,7 @@ static void intel_dvo_pre_enable(struct intel_atomic_state *state, static enum drm_connector_status intel_dvo_detect(struct drm_connector *_connector, bool force) { + struct intel_display *display = to_intel_display(_connector->dev); struct intel_connector *connector = to_intel_connector(_connector); struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dvo *intel_dvo = intel_attached_dvo(connector); @@ -325,10 +326,10 @@ intel_dvo_detect(struct drm_connector *_connector, bool force) drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n", connector->base.base.id, connector->base.name); - if (!intel_display_device_enabled(i915)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return connector->base.status; return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev); @@ -336,11 +337,11 @@ intel_dvo_detect(struct drm_connector *_connector, bool force) static int intel_dvo_get_modes(struct drm_connector *_connector) { + struct intel_display *display = to_intel_display(_connector->dev); struct intel_connector *connector = to_intel_connector(_connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); int num_modes; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return drm_edid_connector_add_modes(&connector->base); /* diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 6a7060889f40..223c4218c019 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1694,7 +1694,7 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * * arithmetic related to alignment and offset calculation. */ if (is_gen12_ccs_cc_plane(&fb->base, i)) { - if (IS_ALIGNED(fb->base.offsets[i], PAGE_SIZE)) + if (IS_ALIGNED(fb->base.offsets[i], 64)) continue; else return -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index 98e1a3606227..37cdfa9c692a 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -7,6 +7,7 @@ #include <drm/drm_fixed.h> +#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_crtc.h" diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index cbcd1e91b7be..8a49e2bb37fa 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -75,7 +75,7 @@ intel_atomic_global_state_get(struct intel_global_state *obj_state) return obj_state; } -void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, +void intel_atomic_global_obj_init(struct intel_display *display, struct intel_global_obj *obj, struct intel_global_state *state, const struct intel_global_state_funcs *funcs) @@ -88,26 +88,26 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, obj->state = state; obj->funcs = funcs; - list_add_tail(&obj->head, &dev_priv->display.global.obj_list); + list_add_tail(&obj->head, &display->global.obj_list); } -void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv) +void intel_atomic_global_obj_cleanup(struct intel_display *display) { struct intel_global_obj *obj, *next; - list_for_each_entry_safe(obj, next, &dev_priv->display.global.obj_list, head) { + list_for_each_entry_safe(obj, next, &display->global.obj_list, head) { list_del(&obj->head); - drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1); + drm_WARN_ON(display->drm, kref_read(&obj->state->ref) != 1); intel_atomic_global_state_put(obj->state); } } -static void assert_global_state_write_locked(struct drm_i915_private *dev_priv) +static void assert_global_state_write_locked(struct intel_display *display) { struct intel_crtc *crtc; - for_each_intel_crtc(&dev_priv->drm, crtc) + for_each_intel_crtc(display->drm, crtc) drm_modeset_lock_assert_held(&crtc->base.mutex); } @@ -126,23 +126,23 @@ static bool modeset_lock_is_held(struct drm_modeset_acquire_ctx *ctx, static void assert_global_state_read_locked(struct intel_atomic_state *state) { + struct intel_display *display = to_intel_display(state); struct drm_modeset_acquire_ctx *ctx = state->base.acquire_ctx; - struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; - for_each_intel_crtc(&dev_priv->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { if (modeset_lock_is_held(ctx, &crtc->base.mutex)) return; } - drm_WARN(&dev_priv->drm, 1, "Global state not read locked\n"); + drm_WARN(display->drm, 1, "Global state not read locked\n"); } struct intel_global_state * intel_atomic_get_global_obj_state(struct intel_atomic_state *state, struct intel_global_obj *obj) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); int index, num_objs, i; size_t size; struct __intel_global_objs_state *arr; @@ -184,7 +184,7 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, state->num_global_objs = num_objs; - drm_dbg_atomic(&i915->drm, "Added new global object %p state %p to %p\n", + drm_dbg_atomic(display->drm, "Added new global object %p state %p to %p\n", obj, obj_state, state); return obj_state; @@ -218,14 +218,14 @@ intel_atomic_get_new_global_obj_state(struct intel_atomic_state *state, void intel_atomic_swap_global_state(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_global_state *old_obj_state, *new_obj_state; struct intel_global_obj *obj; int i; for_each_oldnew_global_obj_in_state(state, obj, old_obj_state, new_obj_state, i) { - drm_WARN_ON(&dev_priv->drm, obj->state != old_obj_state); + drm_WARN_ON(display->drm, obj->state != old_obj_state); /* * If the new state wasn't modified (and properly @@ -234,7 +234,7 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) if (!new_obj_state->changed) continue; - assert_global_state_write_locked(dev_priv); + assert_global_state_write_locked(display); old_obj_state->state = state; new_obj_state->state = NULL; @@ -265,10 +265,10 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state) int intel_atomic_lock_global_state(struct intel_global_state *obj_state) { struct intel_atomic_state *state = obj_state->state; - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_crtc *crtc; - for_each_intel_crtc(&dev_priv->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { int ret; ret = drm_modeset_lock(&crtc->base.mutex, @@ -298,10 +298,10 @@ int intel_atomic_serialize_global_state(struct intel_global_state *obj_state) bool intel_atomic_global_state_is_serialized(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_crtc *crtc; - for_each_intel_crtc(&i915->drm, crtc) + for_each_intel_crtc(display->drm, crtc) if (!intel_atomic_get_new_crtc_state(state, crtc)) return false; return true; @@ -344,7 +344,7 @@ intel_atomic_global_state_setup_commit(struct intel_atomic_state *state) int intel_atomic_global_state_wait_for_dependencies(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_global_state *old_obj_state; struct intel_global_obj *obj; int i; @@ -358,7 +358,7 @@ intel_atomic_global_state_wait_for_dependencies(struct intel_atomic_state *state ret = wait_for_completion_timeout(&commit->done, 10 * HZ); if (ret == 0) { - drm_err(&i915->drm, "global state timed out\n"); + drm_err(display->drm, "global state timed out\n"); return -ETIMEDOUT; } } diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index 6506a8e32972..d42fb2547ee9 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -9,8 +9,8 @@ #include <linux/kref.h> #include <linux/list.h> -struct drm_i915_private; struct intel_atomic_state; +struct intel_display; struct intel_global_obj; struct intel_global_state; @@ -69,11 +69,11 @@ struct __intel_global_objs_state { struct intel_global_state *state, *old_state, *new_state; }; -void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, +void intel_atomic_global_obj_init(struct intel_display *display, struct intel_global_obj *obj, struct intel_global_state *state, const struct intel_global_state_funcs *funcs); -void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv); +void intel_atomic_global_obj_cleanup(struct intel_display *display); struct intel_global_state * intel_atomic_get_global_obj_state(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index e3d938c7f83e..807cf606e7a8 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -496,14 +496,13 @@ static int gmbus_xfer_read(struct intel_display *display, struct i2c_msg *msg, u32 gmbus0_reg, u32 gmbus1_index) { - struct drm_i915_private *i915 = to_i915(display->drm); u8 *buf = msg->buf; unsigned int rx_size = msg->len; unsigned int len; int ret; do { - if (HAS_GMBUS_BURST_READ(i915)) + if (HAS_GMBUS_BURST_READ(display)) len = min(rx_size, INTEL_GMBUS_BURST_READ_MAX_LEN); else len = min(rx_size, gmbus_max_xfer_size(display)); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index f57e4dba2873..7464b44c8bb3 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -31,27 +31,33 @@ #define KEY_LOAD_TRIES 5 #define HDCP2_LC_RETRY_CNT 3 -/* WA: 16022217614 */ static void -intel_hdcp_disable_hdcp_line_rekeying(struct intel_encoder *encoder, - struct intel_hdcp *hdcp) +intel_hdcp_adjust_hdcp_line_rekeying(struct intel_encoder *encoder, + struct intel_hdcp *hdcp, + bool enable) { struct intel_display *display = to_intel_display(encoder); + i915_reg_t rekey_reg; + u32 rekey_bit = 0; /* Here we assume HDMI is in TMDS mode of operation */ if (encoder->type != INTEL_OUTPUT_HDMI) return; - if (DISPLAY_VER(display) >= 14) { - if (IS_DISPLAY_VERx100_STEP(display, 1400, STEP_D0, STEP_FOREVER)) - intel_de_rmw(display, MTL_CHICKEN_TRANS(hdcp->cpu_transcoder), - 0, HDCP_LINE_REKEY_DISABLE); - else if (IS_DISPLAY_VERx100_STEP(display, 1401, STEP_B0, STEP_FOREVER) || - IS_DISPLAY_VERx100_STEP(display, 2000, STEP_B0, STEP_FOREVER)) - intel_de_rmw(display, - TRANS_DDI_FUNC_CTL(display, hdcp->cpu_transcoder), - 0, TRANS_DDI_HDCP_LINE_REKEY_DISABLE); + if (DISPLAY_VER(display) >= 30) { + rekey_reg = TRANS_DDI_FUNC_CTL(display, hdcp->cpu_transcoder); + rekey_bit = XE3_TRANS_DDI_HDCP_LINE_REKEY_DISABLE; + } else if (IS_DISPLAY_VERx100_STEP(display, 1401, STEP_B0, STEP_FOREVER) || + IS_DISPLAY_VERx100_STEP(display, 2000, STEP_B0, STEP_FOREVER)) { + rekey_reg = TRANS_DDI_FUNC_CTL(display, hdcp->cpu_transcoder); + rekey_bit = TRANS_DDI_HDCP_LINE_REKEY_DISABLE; + } else if (IS_DISPLAY_VERx100_STEP(display, 1400, STEP_D0, STEP_FOREVER)) { + rekey_reg = CHICKEN_TRANS(display, hdcp->cpu_transcoder); + rekey_bit = HDCP_LINE_REKEY_DISABLE; } + + if (rekey_bit) + intel_de_rmw(display, rekey_reg, rekey_bit, enable ? 0 : rekey_bit); } static int intel_conn_to_vcpi(struct intel_atomic_state *state, @@ -343,7 +349,7 @@ static bool hdcp_key_loadable(struct intel_display *display) /* PG1 (power well #1) needs to be enabled */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) - enabled = intel_display_power_well_is_enabled(i915, id); + enabled = intel_display_power_well_is_enabled(display, id); /* * Another req for hdcp key loadability is enabled state of pll for @@ -1048,6 +1054,8 @@ static int intel_hdcp1_enable(struct intel_connector *connector) return ret; } + intel_hdcp_adjust_hdcp_line_rekeying(connector->encoder, hdcp, true); + /* Incase of authentication failures, HDCP spec expects reauth. */ for (i = 0; i < tries; i++) { ret = intel_hdcp_auth(connector); @@ -1158,9 +1166,15 @@ static int intel_hdcp_check_link(struct intel_connector *connector) goto out; } - intel_hdcp_update_value(connector, - DRM_MODE_CONTENT_PROTECTION_DESIRED, - true); + ret = intel_hdcp1_enable(connector); + if (ret) { + drm_err(display->drm, "Failed to enable hdcp (%d)\n", ret); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); + goto out; + } + out: mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); @@ -2069,7 +2083,7 @@ static int _intel_hdcp2_enable(struct intel_atomic_state *state, connector->base.base.id, connector->base.name, hdcp->content_type); - intel_hdcp_disable_hdcp_line_rekeying(connector->encoder, hdcp); + intel_hdcp_adjust_hdcp_line_rekeying(connector->encoder, hdcp, false); ret = hdcp2_authenticate_and_encrypt(state, connector); if (ret) { diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ae7600ea4a19..5ae678f4eaa7 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1600,14 +1600,12 @@ static bool intel_hdmi_hdcp_check_link(struct intel_digital_port *dig_port, struct intel_connector *connector) { - struct intel_display *display = to_intel_display(dig_port); int retry; for (retry = 0; retry < 3; retry++) if (intel_hdmi_hdcp_check_link_once(dig_port, connector)) return true; - drm_err(display->drm, "Link check failed\n"); return false; } @@ -2556,10 +2554,10 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_device_enabled(dev_priv)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(dev_priv)) + if (!intel_display_driver_check_access(display)) return connector->status; wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); @@ -2586,12 +2584,11 @@ static void intel_hdmi_force(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *i915 = to_i915(connector->dev); drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return; intel_hdmi_unset_edid(connector); @@ -3042,7 +3039,7 @@ void intel_infoframe_init(struct intel_digital_port *dig_port) } } -void intel_hdmi_init_connector(struct intel_digital_port *dig_port, +bool intel_hdmi_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector) { struct intel_display *display = to_intel_display(dig_port); @@ -3059,17 +3056,17 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, intel_encoder->base.base.id, intel_encoder->base.name); if (DISPLAY_VER(display) < 12 && drm_WARN_ON(dev, port == PORT_A)) - return; + return false; if (drm_WARN(dev, dig_port->max_lanes < 4, "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n", dig_port->max_lanes, intel_encoder->base.base.id, intel_encoder->base.name)) - return; + return false; ddc_pin = intel_hdmi_ddc_pin(intel_encoder); if (!ddc_pin) - return; + return false; drm_connector_init_with_ddc(dev, connector, &intel_hdmi_connector_funcs, @@ -3114,6 +3111,8 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, &conn_info); if (!intel_hdmi->cec_notifier) drm_dbg_kms(display->drm, "CEC notifier get failed\n"); + + return true; } /* diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 466f48df8a74..38deaeb302a2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -22,7 +22,7 @@ struct intel_encoder; struct intel_hdmi; union hdmi_infoframe; -void intel_hdmi_init_connector(struct intel_digital_port *dig_port, +bool intel_hdmi_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector); bool intel_hdmi_compute_has_hdmi_sink(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index a013b0e0ef54..3adc791d3776 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -813,8 +813,10 @@ static void i915_hpd_poll_init_work(struct work_struct *work) */ void intel_hpd_poll_enable(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; + if (!HAS_DISPLAY(dev_priv) || - !intel_display_device_enabled(dev_priv)) + !intel_display_device_enabled(display)) return; WRITE_ONCE(dev_priv->display.hotplug.poll_enabled, true); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index cb64c6f0ad1b..476ac88087e0 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -1457,7 +1457,11 @@ void intel_hpd_enable_detection(struct intel_encoder *encoder) void intel_hpd_irq_setup(struct drm_i915_private *i915) { - if (i915->display.irq.display_irqs_enabled && i915->display.funcs.hotplug) + if ((IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) && + !i915->display.irq.vlv_display_irqs_enabled) + return; + + if (i915->display.funcs.hotplug) i915->display.funcs.hotplug->hpd_irq_setup(i915); } diff --git a/drivers/gpu/drm/i915/display/intel_hti.c b/drivers/gpu/drm/i915/display/intel_hti.c index 19d1f196d9fb..fb6b84f6a81d 100644 --- a/drivers/gpu/drm/i915/display/intel_hti.c +++ b/drivers/gpu/drm/i915/display/intel_hti.c @@ -3,7 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_drv.h" +#include <drm/drm_device.h> + #include "intel_de.h" #include "intel_display.h" #include "intel_hti.h" diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index c87cd1d16d0a..29705c159119 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -5,10 +5,9 @@ #include <drm/drm_fixed.h> -#include "i915_drv.h" - #include "intel_atomic.h" #include "intel_crtc.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dp_mst.h" #include "intel_dp_tunnel.h" diff --git a/drivers/gpu/drm/i915/display/intel_load_detect.c b/drivers/gpu/drm/i915/display/intel_load_detect.c index b457c69dc0be..86cc03a4413c 100644 --- a/drivers/gpu/drm/i915/display/intel_load_detect.c +++ b/drivers/gpu/drm/i915/display/intel_load_detect.c @@ -7,9 +7,9 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_uapi.h> -#include "i915_drv.h" #include "intel_atomic.h" #include "intel_crtc.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_load_detect.h" diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index f9db867fae89..d75dd17fad32 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -29,11 +29,12 @@ #include <drm/drm_edid.h> #include "i915_reg.h" +#include "i915_utils.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" -#include "intel_lspcon.h" #include "intel_hdmi.h" +#include "intel_lspcon.h" /* LSPCON OUI Vendor ID(signatures) */ #define LSPCON_VENDOR_PARADE_OUI 0x001CF8 diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index ef2f5c8948a7..4b0dce169d4e 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -57,12 +57,7 @@ /* Private structure for the integrated LVDS support */ struct intel_lvds_pps { - /* 100us units */ - int t1_t2; - int t3; - int t4; - int t5; - int tx; + struct intel_pps_delays delays; int divider; @@ -168,12 +163,12 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, val = intel_de_read(dev_priv, PP_ON_DELAYS(dev_priv, 0)); pps->port = REG_FIELD_GET(PANEL_PORT_SELECT_MASK, val); - pps->t1_t2 = REG_FIELD_GET(PANEL_POWER_UP_DELAY_MASK, val); - pps->t5 = REG_FIELD_GET(PANEL_LIGHT_ON_DELAY_MASK, val); + pps->delays.power_up = REG_FIELD_GET(PANEL_POWER_UP_DELAY_MASK, val); + pps->delays.backlight_on = REG_FIELD_GET(PANEL_LIGHT_ON_DELAY_MASK, val); val = intel_de_read(dev_priv, PP_OFF_DELAYS(dev_priv, 0)); - pps->t3 = REG_FIELD_GET(PANEL_POWER_DOWN_DELAY_MASK, val); - pps->tx = REG_FIELD_GET(PANEL_LIGHT_OFF_DELAY_MASK, val); + pps->delays.power_down = REG_FIELD_GET(PANEL_POWER_DOWN_DELAY_MASK, val); + pps->delays.backlight_off = REG_FIELD_GET(PANEL_LIGHT_OFF_DELAY_MASK, val); val = intel_de_read(dev_priv, PP_DIVISOR(dev_priv, 0)); pps->divider = REG_FIELD_GET(PP_REFERENCE_DIVIDER_MASK, val); @@ -186,25 +181,30 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, if (val) val--; /* Convert from 100ms to 100us units */ - pps->t4 = val * 1000; + pps->delays.power_cycle = val * 1000; if (DISPLAY_VER(dev_priv) < 5 && - pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { + pps->delays.power_up == 0 && + pps->delays.backlight_on == 0 && + pps->delays.power_down == 0 && + pps->delays.backlight_off == 0) { drm_dbg_kms(&dev_priv->drm, "Panel power timings uninitialized, " "setting defaults\n"); /* Set T2 to 40ms and T5 to 200ms in 100 usec units */ - pps->t1_t2 = 40 * 10; - pps->t5 = 200 * 10; + pps->delays.power_up = 40 * 10; + pps->delays.backlight_on = 200 * 10; /* Set T3 to 35ms and Tx to 200ms in 100 usec units */ - pps->t3 = 35 * 10; - pps->tx = 200 * 10; + pps->delays.power_down = 35 * 10; + pps->delays.backlight_off = 200 * 10; } - drm_dbg(&dev_priv->drm, "LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d " + drm_dbg(&dev_priv->drm, "LVDS PPS:power_up %d power_down %d power_cycle %d backlight_on %d backlight_off %d " "divider %d port %d powerdown_on_reset %d\n", - pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx, - pps->divider, pps->port, pps->powerdown_on_reset); + pps->delays.power_up, pps->delays.power_down, + pps->delays.power_cycle, pps->delays.backlight_on, + pps->delays.backlight_off, pps->divider, + pps->port, pps->powerdown_on_reset); } static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, @@ -221,16 +221,17 @@ static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, intel_de_write(dev_priv, PP_ON_DELAYS(dev_priv, 0), REG_FIELD_PREP(PANEL_PORT_SELECT_MASK, pps->port) | - REG_FIELD_PREP(PANEL_POWER_UP_DELAY_MASK, pps->t1_t2) | - REG_FIELD_PREP(PANEL_LIGHT_ON_DELAY_MASK, pps->t5)); + REG_FIELD_PREP(PANEL_POWER_UP_DELAY_MASK, pps->delays.power_up) | + REG_FIELD_PREP(PANEL_LIGHT_ON_DELAY_MASK, pps->delays.backlight_on)); intel_de_write(dev_priv, PP_OFF_DELAYS(dev_priv, 0), - REG_FIELD_PREP(PANEL_POWER_DOWN_DELAY_MASK, pps->t3) | - REG_FIELD_PREP(PANEL_LIGHT_OFF_DELAY_MASK, pps->tx)); + REG_FIELD_PREP(PANEL_POWER_DOWN_DELAY_MASK, pps->delays.power_down) | + REG_FIELD_PREP(PANEL_LIGHT_OFF_DELAY_MASK, pps->delays.backlight_off)); intel_de_write(dev_priv, PP_DIVISOR(dev_priv, 0), REG_FIELD_PREP(PP_REFERENCE_DIVIDER_MASK, pps->divider) | - REG_FIELD_PREP(PANEL_POWER_CYCLE_DELAY_MASK, DIV_ROUND_UP(pps->t4, 1000) + 1)); + REG_FIELD_PREP(PANEL_POWER_CYCLE_DELAY_MASK, + DIV_ROUND_UP(pps->delays.power_cycle, 1000) + 1)); } static void intel_pre_enable_lvds(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 2c8668b1ebae..9a2bea19f17b 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -116,6 +116,7 @@ static void set_encoder_for_connector(struct intel_connector *connector, static void reset_encoder_connector_state(struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_pmdemand_state *pmdemand_state = to_intel_pmdemand_state(i915->display.pmdemand.obj.state); @@ -128,7 +129,7 @@ static void reset_encoder_connector_state(struct intel_encoder *encoder) continue; /* Clear the corresponding bit in pmdemand active phys mask */ - intel_pmdemand_update_phys_mask(i915, encoder, + intel_pmdemand_update_phys_mask(display, encoder, pmdemand_state, false); set_encoder_for_connector(connector, NULL); @@ -152,6 +153,7 @@ static void reset_crtc_encoder_state(struct intel_crtc *crtc) static void intel_crtc_disable_noatomic_complete(struct intel_crtc *crtc) { + struct intel_display *display = to_intel_display(crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); struct intel_bw_state *bw_state = to_intel_bw_state(i915->display.bw.obj.state); @@ -185,7 +187,7 @@ static void intel_crtc_disable_noatomic_complete(struct intel_crtc *crtc) bw_state->data_rate[pipe] = 0; bw_state->num_active_planes[pipe] = 0; - intel_pmdemand_update_port_clock(i915, pmdemand_state, pipe, 0); + intel_pmdemand_update_port_clock(display, pmdemand_state, pipe, 0); } /* @@ -582,6 +584,7 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) static void intel_sanitize_encoder(struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_connector *connector; struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); @@ -613,7 +616,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) encoder->base.name); /* Clear the corresponding bit in pmdemand active phys mask */ - intel_pmdemand_update_phys_mask(i915, encoder, + intel_pmdemand_update_phys_mask(display, encoder, pmdemand_state, false); /* @@ -770,11 +773,11 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) } } - intel_pmdemand_update_phys_mask(i915, encoder, + intel_pmdemand_update_phys_mask(display, encoder, pmdemand_state, true); } else { - intel_pmdemand_update_phys_mask(i915, encoder, + intel_pmdemand_update_phys_mask(display, encoder, pmdemand_state, false); @@ -899,13 +902,13 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) cdclk_state->min_voltage_level[crtc->pipe] = crtc_state->min_voltage_level; - intel_pmdemand_update_port_clock(i915, pmdemand_state, pipe, + intel_pmdemand_update_port_clock(display, pmdemand_state, pipe, crtc_state->port_clock); intel_bw_crtc_update(bw_state, crtc_state); } - intel_pmdemand_init_pmdemand_params(i915, pmdemand_state); + intel_pmdemand_init_pmdemand_params(display, pmdemand_state); } static void @@ -1024,5 +1027,5 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); - intel_power_domains_sanitize_state(i915); + intel_power_domains_sanitize_state(display); } diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 2ec14096ba9c..ca30fff61876 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -183,7 +183,7 @@ struct overlay_registers { }; struct intel_overlay { - struct drm_i915_private *i915; + struct intel_display *display; struct intel_context *context; struct intel_crtc *crtc; struct i915_vma *vma; @@ -205,17 +205,17 @@ struct intel_overlay { void (*flip_complete)(struct intel_overlay *ovl); }; -static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, +static void i830_overlay_clock_gating(struct intel_display *display, bool enable) { - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); u8 val; /* WA_OVERLAY_CLKGATE:alm */ if (enable) - intel_de_write(dev_priv, DSPCLK_GATE_D(dev_priv), 0); + intel_de_write(display, DSPCLK_GATE_D(display), 0); else - intel_de_write(dev_priv, DSPCLK_GATE_D(dev_priv), + intel_de_write(display, DSPCLK_GATE_D(display), OVRUNIT_CLOCK_GATE_DISABLE); /* WA_DISABLE_L2CACHE_CLOCK_GATING:alm */ @@ -253,11 +253,11 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *)) /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; struct i915_request *rq; u32 *cs; - drm_WARN_ON(&dev_priv->drm, overlay->active); + drm_WARN_ON(display->drm, overlay->active); rq = alloc_request(overlay, NULL); if (IS_ERR(rq)) @@ -271,8 +271,8 @@ static int intel_overlay_on(struct intel_overlay *overlay) overlay->active = true; - if (IS_I830(dev_priv)) - i830_overlay_clock_gating(dev_priv, false); + if (display->platform.i830) + i830_overlay_clock_gating(display, false); *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; *cs++ = overlay->flip_addr | OFC_UPDATE; @@ -288,10 +288,12 @@ static int intel_overlay_on(struct intel_overlay *overlay) static void intel_overlay_flip_prepare(struct intel_overlay *overlay, struct i915_vma *vma) { + struct intel_display *display = overlay->display; + struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe = overlay->crtc->pipe; struct intel_frontbuffer *frontbuffer = NULL; - drm_WARN_ON(&overlay->i915->drm, overlay->old_vma); + drm_WARN_ON(display->drm, overlay->old_vma); if (vma) frontbuffer = intel_frontbuffer_get(intel_bo_to_drm_bo(vma->obj)); @@ -303,8 +305,7 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, intel_frontbuffer_put(overlay->frontbuffer); overlay->frontbuffer = frontbuffer; - intel_frontbuffer_flip_prepare(overlay->i915, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_flip_prepare(i915, INTEL_FRONTBUFFER_OVERLAY(pipe)); overlay->old_vma = overlay->vma; if (vma) @@ -318,20 +319,20 @@ static int intel_overlay_continue(struct intel_overlay *overlay, struct i915_vma *vma, bool load_polyphase_filter) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; struct i915_request *rq; u32 flip_addr = overlay->flip_addr; u32 tmp, *cs; - drm_WARN_ON(&dev_priv->drm, !overlay->active); + drm_WARN_ON(display->drm, !overlay->active); if (load_polyphase_filter) flip_addr |= OFC_UPDATE; /* check for underruns */ - tmp = intel_de_read(dev_priv, DOVSTA); + tmp = intel_de_read(display, DOVSTA); if (tmp & (1 << 17)) - drm_dbg(&dev_priv->drm, "overlay underrun, DOVSTA: %x\n", tmp); + drm_dbg(display->drm, "overlay underrun, DOVSTA: %x\n", tmp); rq = alloc_request(overlay, NULL); if (IS_ERR(rq)) @@ -355,14 +356,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, static void intel_overlay_release_old_vma(struct intel_overlay *overlay) { + struct intel_display *display = overlay->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct i915_vma *vma; vma = fetch_and_zero(&overlay->old_vma); - if (drm_WARN_ON(&overlay->i915->drm, !vma)) + if (drm_WARN_ON(display->drm, !vma)) return; - intel_frontbuffer_flip_complete(overlay->i915, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + intel_frontbuffer_flip_complete(i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); i915_vma_unpin(vma); i915_vma_put(vma); @@ -376,7 +378,7 @@ intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) static void intel_overlay_off_tail(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; intel_overlay_release_old_vma(overlay); @@ -384,8 +386,8 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) overlay->crtc = NULL; overlay->active = false; - if (IS_I830(dev_priv)) - i830_overlay_clock_gating(dev_priv, true); + if (display->platform.i830) + i830_overlay_clock_gating(display, true); } static void intel_overlay_last_flip_retire(struct i915_active *active) @@ -400,10 +402,11 @@ static void intel_overlay_last_flip_retire(struct i915_active *active) /* overlay needs to be disabled in OCMD reg */ static int intel_overlay_off(struct intel_overlay *overlay) { + struct intel_display *display = overlay->display; struct i915_request *rq; u32 *cs, flip_addr = overlay->flip_addr; - drm_WARN_ON(&overlay->i915->drm, !overlay->active); + drm_WARN_ON(display->drm, !overlay->active); /* According to intel docs the overlay hw may hang (when switching * off) without loading the filter coeffs. It is however unclear whether @@ -452,7 +455,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) */ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; struct i915_request *rq; u32 *cs; @@ -463,7 +466,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (!overlay->old_vma) return 0; - if (!(intel_de_read(dev_priv, GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT)) { + if (!(intel_de_read(display, GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT)) { intel_overlay_release_old_vid_tail(overlay); return 0; } @@ -487,9 +490,9 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) return i915_active_wait(&overlay->last_flip); } -void intel_overlay_reset(struct drm_i915_private *dev_priv) +void intel_overlay_reset(struct intel_display *display) { - struct intel_overlay *overlay = dev_priv->display.overlay; + struct intel_overlay *overlay = display->overlay; if (!overlay) return; @@ -550,11 +553,11 @@ static int uv_vsubsampling(u32 format) } } -static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 width) +static u32 calc_swidthsw(struct intel_display *display, u32 offset, u32 width) { u32 sw; - if (DISPLAY_VER(dev_priv) == 2) + if (DISPLAY_VER(display) == 2) sw = ALIGN((offset & 31) + width, 32); else sw = ALIGN((offset & 63) + width, 64); @@ -789,16 +792,17 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct drm_i915_gem_object *new_bo, struct drm_intel_overlay_put_image *params) { + struct intel_display *display = overlay->display; + struct drm_i915_private *dev_priv = to_i915(display->drm); struct overlay_registers __iomem *regs = overlay->regs; - struct drm_i915_private *dev_priv = overlay->i915; u32 swidth, swidthsw, sheight, ostride; enum pipe pipe = overlay->crtc->pipe; bool scale_changed = false; struct i915_vma *vma; int ret, tmp_width; - drm_WARN_ON(&dev_priv->drm, - !drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); + drm_WARN_ON(display->drm, + !drm_modeset_is_locked(&display->drm->mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); if (ret != 0) @@ -824,7 +828,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, oconfig |= OCONF_CC_OUT_8BIT; if (crtc_state->gamma_enable) oconfig |= OCONF_GAMMA2_ENABLE; - if (DISPLAY_VER(dev_priv) == 4) + if (DISPLAY_VER(display) == 4) oconfig |= OCONF_CSC_MODE_BT709; oconfig |= pipe == 0 ? OCONF_PIPE_A : OCONF_PIPE_B; @@ -845,7 +849,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, tmp_width = params->src_width; swidth = params->src_width; - swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width); + swidthsw = calc_swidthsw(display, params->offset_Y, tmp_width); sheight = params->src_height; iowrite32(i915_ggtt_offset(vma) + params->offset_Y, ®s->OBUF_0Y); ostride = params->stride_Y; @@ -858,9 +862,9 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, swidth |= (params->src_width / uv_hscale) << 16; sheight |= (params->src_height / uv_vscale) << 16; - tmp_U = calc_swidthsw(dev_priv, params->offset_U, + tmp_U = calc_swidthsw(display, params->offset_U, params->src_width / uv_hscale); - tmp_V = calc_swidthsw(dev_priv, params->offset_V, + tmp_V = calc_swidthsw(display, params->offset_V, params->src_width / uv_hscale); swidthsw |= max(tmp_U, tmp_V) << 16; @@ -899,11 +903,11 @@ out_pin_section: int intel_overlay_switch_off(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; int ret; - drm_WARN_ON(&dev_priv->drm, - !drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); + drm_WARN_ON(display->drm, + !drm_modeset_is_locked(&display->drm->mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); if (ret != 0) @@ -936,26 +940,24 @@ static int check_overlay_possible_on_crtc(struct intel_overlay *overlay, static void update_pfit_vscale_ratio(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; + struct intel_display *display = overlay->display; u32 ratio; /* XXX: This is not the same logic as in the xorg driver, but more in * line with the intel documentation for the i965 */ - if (DISPLAY_VER(dev_priv) >= 4) { - u32 tmp = intel_de_read(dev_priv, PFIT_PGM_RATIOS(dev_priv)); + if (DISPLAY_VER(display) >= 4) { + u32 tmp = intel_de_read(display, PFIT_PGM_RATIOS(display)); /* on i965 use the PGM reg to read out the autoscaler values */ ratio = REG_FIELD_GET(PFIT_VERT_SCALE_MASK_965, tmp); } else { u32 tmp; - if (intel_de_read(dev_priv, PFIT_CONTROL(dev_priv)) & PFIT_VERT_AUTO_SCALE) - tmp = intel_de_read(dev_priv, - PFIT_AUTO_RATIOS(dev_priv)); + if (intel_de_read(display, PFIT_CONTROL(display)) & PFIT_VERT_AUTO_SCALE) + tmp = intel_de_read(display, PFIT_AUTO_RATIOS(display)); else - tmp = intel_de_read(dev_priv, - PFIT_PGM_RATIOS(dev_priv)); + tmp = intel_de_read(display, PFIT_PGM_RATIOS(display)); ratio = REG_FIELD_GET(PFIT_VERT_SCALE_MASK, tmp); } @@ -1000,7 +1002,7 @@ static int check_overlay_scaling(struct drm_intel_overlay_put_image *rec) return 0; } -static int check_overlay_src(struct drm_i915_private *dev_priv, +static int check_overlay_src(struct intel_display *display, struct drm_intel_overlay_put_image *rec, struct drm_i915_gem_object *new_bo) { @@ -1011,7 +1013,7 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, u32 tmp; /* check src dimensions */ - if (IS_I845G(dev_priv) || IS_I830(dev_priv)) { + if (display->platform.i845g || display->platform.i830) { if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY || rec->src_width > IMAGE_MAX_WIDTH_LEGACY) return -EINVAL; @@ -1063,14 +1065,14 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, return -EINVAL; /* stride checking */ - if (IS_I830(dev_priv) || IS_I845G(dev_priv)) + if (display->platform.i830 || display->platform.i845g) stride_mask = 255; else stride_mask = 63; if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask) return -EINVAL; - if (DISPLAY_VER(dev_priv) == 4 && rec->stride_Y < 512) + if (DISPLAY_VER(display) == 4 && rec->stride_Y < 512) return -EINVAL; tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ? @@ -1114,17 +1116,17 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct intel_display *display = to_intel_display(dev); struct drm_intel_overlay_put_image *params = data; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_overlay *overlay; struct drm_crtc *drmmode_crtc; struct intel_crtc *crtc; struct drm_i915_gem_object *new_bo; int ret; - overlay = dev_priv->display.overlay; + overlay = display->overlay; if (!overlay) { - drm_dbg(&dev_priv->drm, "userspace bug: no overlay\n"); + drm_dbg(display->drm, "userspace bug: no overlay\n"); return -ENODEV; } @@ -1148,7 +1150,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); if (i915_gem_object_is_tiled(new_bo)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; @@ -1197,7 +1199,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - ret = check_overlay_src(dev_priv, params, new_bo); + ret = check_overlay_src(display, params, new_bo); if (ret != 0) goto out_unlock; @@ -1277,14 +1279,14 @@ static int check_gamma(struct drm_intel_overlay_attrs *attrs) int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct intel_display *display = to_intel_display(dev); struct drm_intel_overlay_attrs *attrs = data; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_overlay *overlay; int ret; - overlay = dev_priv->display.overlay; + overlay = display->overlay; if (!overlay) { - drm_dbg(&dev_priv->drm, "userspace bug: no overlay\n"); + drm_dbg(display->drm, "userspace bug: no overlay\n"); return -ENODEV; } @@ -1297,13 +1299,13 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, attrs->contrast = overlay->contrast; attrs->saturation = overlay->saturation; - if (DISPLAY_VER(dev_priv) != 2) { - attrs->gamma0 = intel_de_read(dev_priv, OGAMC0); - attrs->gamma1 = intel_de_read(dev_priv, OGAMC1); - attrs->gamma2 = intel_de_read(dev_priv, OGAMC2); - attrs->gamma3 = intel_de_read(dev_priv, OGAMC3); - attrs->gamma4 = intel_de_read(dev_priv, OGAMC4); - attrs->gamma5 = intel_de_read(dev_priv, OGAMC5); + if (DISPLAY_VER(display) != 2) { + attrs->gamma0 = intel_de_read(display, OGAMC0); + attrs->gamma1 = intel_de_read(display, OGAMC1); + attrs->gamma2 = intel_de_read(display, OGAMC2); + attrs->gamma3 = intel_de_read(display, OGAMC3); + attrs->gamma4 = intel_de_read(display, OGAMC4); + attrs->gamma5 = intel_de_read(display, OGAMC5); } } else { if (attrs->brightness < -128 || attrs->brightness > 127) @@ -1321,7 +1323,7 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, update_reg_attrs(overlay, overlay->regs); if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) { - if (DISPLAY_VER(dev_priv) == 2) + if (DISPLAY_VER(display) == 2) goto out_unlock; if (overlay->active) { @@ -1333,12 +1335,12 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock; - intel_de_write(dev_priv, OGAMC0, attrs->gamma0); - intel_de_write(dev_priv, OGAMC1, attrs->gamma1); - intel_de_write(dev_priv, OGAMC2, attrs->gamma2); - intel_de_write(dev_priv, OGAMC3, attrs->gamma3); - intel_de_write(dev_priv, OGAMC4, attrs->gamma4); - intel_de_write(dev_priv, OGAMC5, attrs->gamma5); + intel_de_write(display, OGAMC0, attrs->gamma0); + intel_de_write(display, OGAMC1, attrs->gamma1); + intel_de_write(display, OGAMC2, attrs->gamma2); + intel_de_write(display, OGAMC3, attrs->gamma3); + intel_de_write(display, OGAMC4, attrs->gamma4); + intel_de_write(display, OGAMC5, attrs->gamma5); } } overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0; @@ -1352,12 +1354,13 @@ out_unlock: static int get_registers(struct intel_overlay *overlay, bool use_phys) { - struct drm_i915_private *i915 = overlay->i915; + struct intel_display *display = overlay->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct drm_i915_gem_object *obj = ERR_PTR(-ENODEV); struct i915_vma *vma; int err; - if (!IS_METEORLAKE(i915)) /* Wa_22018444074 */ + if (!display->platform.meteorlake) /* Wa_22018444074 */ obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -1390,13 +1393,14 @@ err_put_bo: return err; } -void intel_overlay_setup(struct drm_i915_private *dev_priv) +void intel_overlay_setup(struct intel_display *display) { + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_overlay *overlay; struct intel_engine_cs *engine; int ret; - if (!HAS_OVERLAY(dev_priv)) + if (!HAS_OVERLAY(display)) return; engine = to_gt(dev_priv)->engine[RCS0]; @@ -1407,7 +1411,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) if (!overlay) return; - overlay->i915 = dev_priv; + overlay->display = display; overlay->context = engine->kernel_context; overlay->color_key = 0x0101fe; overlay->color_key_enabled = true; @@ -1418,7 +1422,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) i915_active_init(&overlay->last_flip, NULL, intel_overlay_last_flip_retire, 0); - ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); + ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(display)); if (ret) goto out_free; @@ -1426,19 +1430,24 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) update_polyphase_filter(overlay->regs); update_reg_attrs(overlay, overlay->regs); - dev_priv->display.overlay = overlay; - drm_info(&dev_priv->drm, "Initialized overlay support.\n"); + display->overlay = overlay; + drm_info(display->drm, "Initialized overlay support.\n"); return; out_free: kfree(overlay); } -void intel_overlay_cleanup(struct drm_i915_private *dev_priv) +bool intel_overlay_available(struct intel_display *display) +{ + return display->overlay; +} + +void intel_overlay_cleanup(struct intel_display *display) { struct intel_overlay *overlay; - overlay = fetch_and_zero(&dev_priv->display.overlay); + overlay = fetch_and_zero(&display->overlay); if (!overlay) return; @@ -1447,7 +1456,7 @@ void intel_overlay_cleanup(struct drm_i915_private *dev_priv) * Furthermore modesetting teardown happens beforehand so the * hardware should be off already. */ - drm_WARN_ON(&dev_priv->drm, overlay->active); + drm_WARN_ON(display->drm, overlay->active); i915_gem_object_put(overlay->reg_bo); i915_active_fini(&overlay->last_flip); @@ -1467,8 +1476,7 @@ struct intel_overlay_snapshot { struct intel_overlay_snapshot * intel_overlay_snapshot_capture(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - struct intel_overlay *overlay = dev_priv->display.overlay; + struct intel_overlay *overlay = display->overlay; struct intel_overlay_snapshot *error; if (!overlay || !overlay->active) @@ -1478,8 +1486,8 @@ intel_overlay_snapshot_capture(struct intel_display *display) if (error == NULL) return NULL; - error->dovsta = intel_de_read(dev_priv, DOVSTA); - error->isr = intel_de_read(dev_priv, GEN2_ISR); + error->dovsta = intel_de_read(display, DOVSTA); + error->isr = intel_de_read(display, GEN2_ISR); error->base = overlay->flip_addr; memcpy_fromio(&error->regs, overlay->regs, sizeof(error->regs)); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.h b/drivers/gpu/drm/i915/display/intel_overlay.h index eafac24d1de8..45a42fce754e 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.h +++ b/drivers/gpu/drm/i915/display/intel_overlay.h @@ -17,19 +17,24 @@ struct intel_overlay; struct intel_overlay_snapshot; #ifdef I915 -void intel_overlay_setup(struct drm_i915_private *dev_priv); -void intel_overlay_cleanup(struct drm_i915_private *dev_priv); +void intel_overlay_setup(struct intel_display *display); +bool intel_overlay_available(struct intel_display *display); +void intel_overlay_cleanup(struct intel_display *display); int intel_overlay_switch_off(struct intel_overlay *overlay); int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void intel_overlay_reset(struct drm_i915_private *dev_priv); +void intel_overlay_reset(struct intel_display *display); #else -static inline void intel_overlay_setup(struct drm_i915_private *dev_priv) +static inline void intel_overlay_setup(struct intel_display *display) { } -static inline void intel_overlay_cleanup(struct drm_i915_private *dev_priv) +static inline bool intel_overlay_available(struct intel_display *display) +{ + return false; +} +static inline void intel_overlay_cleanup(struct intel_display *display) { } static inline int intel_overlay_switch_off(struct intel_overlay *overlay) @@ -37,7 +42,7 @@ static inline int intel_overlay_switch_off(struct intel_overlay *overlay) return 0; } static inline int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) + struct drm_file *file_priv) { return 0; } @@ -46,7 +51,7 @@ static inline int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, { return 0; } -static inline void intel_overlay_reset(struct drm_i915_private *dev_priv) +static inline void intel_overlay_reset(struct intel_display *display) { } #endif diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 313bd3f35ace..4e6c5592c7ae 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -33,7 +33,6 @@ #include <drm/drm_edid.h> -#include "i915_drv.h" #include "intel_backlight.h" #include "intel_connector.h" #include "intel_display_core.h" @@ -383,12 +382,12 @@ void intel_panel_add_encoder_fixed_mode(struct intel_connector *connector, enum drm_connector_status intel_panel_detect(struct drm_connector *connector, bool force) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); - if (!intel_display_device_enabled(i915)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return connector->status; return connector_status_connected; diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c index 4210de87a0a2..8fa5a6334d10 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_display.c +++ b/drivers/gpu/drm/i915/display/intel_pch_display.c @@ -4,8 +4,10 @@ */ #include "g4x_dp.h" +#include "i915_drv.h" #include "i915_reg.h" #include "intel_crt.h" +#include "intel_crt_regs.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dpll.h" diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c index 84c55971e91a..71471c1d7dc9 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c +++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "i915_drv.h" #include "i915_reg.h" #include "intel_de.h" #include "intel_display_types.h" @@ -108,13 +109,13 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, PIXCLK_GATE, PIXCLK_GATE_GATE); - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); temp = intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK); temp |= SBI_SSCCTL_DISABLE; intel_sbi_write(dev_priv, SBI_SSCCTL6, temp, SBI_ICLK); - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); } struct iclkip_params { @@ -195,7 +196,7 @@ void lpt_program_iclkip(const struct intel_crtc_state *crtc_state) "iCLKIP clock: found settings for %dKHz refresh rate: auxdiv=%x, divsel=%x, phasedir=%x, phaseinc=%x\n", clock, p.auxdiv, p.divsel, p.phasedir, p.phaseinc); - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); /* Program SSCDIVINTPHASE6 */ temp = intel_sbi_read(dev_priv, SBI_SSCDIVINTPHASE6, SBI_ICLK); @@ -218,7 +219,7 @@ void lpt_program_iclkip(const struct intel_crtc_state *crtc_state) temp &= ~SBI_SSCCTL_DISABLE; intel_sbi_write(dev_priv, SBI_SSCCTL6, temp, SBI_ICLK); - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); /* Wait for initialization time */ udelay(24); @@ -236,11 +237,11 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv) iclkip_params_init(&p); - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); temp = intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK); if (temp & SBI_SSCCTL_DISABLE) { - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); return 0; } @@ -254,7 +255,7 @@ int lpt_get_iclkip(struct drm_i915_private *dev_priv) p.auxdiv = (temp & SBI_SSCAUXDIV_FINALDIV2SEL_MASK) >> SBI_SSCAUXDIV_FINALDIV2SEL_SHIFT; - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); p.desired_divisor = (p.divsel + 2) * p.iclk_pi_range + p.phaseinc; @@ -279,7 +280,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, with_fdi, "LP PCH doesn't have FDI\n")) with_fdi = false; - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK); tmp &= ~SBI_SSCCTL_DISABLE; @@ -302,7 +303,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE; intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK); - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); } /* Sequence to disable CLKOUT_DP */ @@ -310,7 +311,7 @@ void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) { u32 reg, tmp; - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); reg = HAS_PCH_LPT_LP(dev_priv) ? SBI_GEN0 : SBI_DBUFF0; tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK); @@ -328,7 +329,7 @@ void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK); } - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); } #define BEND_IDX(steps) ((50 + (steps)) / 5) @@ -374,7 +375,7 @@ static void lpt_bend_clkout_dp(struct drm_i915_private *dev_priv, int steps) if (drm_WARN_ON(&dev_priv->drm, idx >= ARRAY_SIZE(sscdivintphase))) return; - mutex_lock(&dev_priv->sb_lock); + intel_sbi_lock(dev_priv); if (steps % 10 != 0) tmp = 0xAAAAAAAB; @@ -387,7 +388,7 @@ static void lpt_bend_clkout_dp(struct drm_i915_private *dev_priv, int steps) tmp |= sscdivintphase[idx]; intel_sbi_write(dev_priv, SBI_SSCDIVINTPHASE, tmp, SBI_ICLK); - mutex_unlock(&dev_priv->sb_lock); + intel_sbi_unlock(dev_priv); } #undef BEND_IDX diff --git a/drivers/gpu/drm/i915/display/intel_pfit.c b/drivers/gpu/drm/i915/display/intel_pfit.c index 50861aa78a89..4ee03d9d14ad 100644 --- a/drivers/gpu/drm/i915/display/intel_pfit.c +++ b/drivers/gpu/drm/i915/display/intel_pfit.c @@ -3,8 +3,8 @@ * Copyright © 2024 Intel Corporation */ -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_display_core.h" #include "intel_display_driver.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index 304da826dee1..90efc6f64e52 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -28,6 +28,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> +#include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 62401f6a04e4..6789b7f14095 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -20,10 +20,10 @@ intel_reuse_initial_plane_obj(struct intel_crtc *this, struct drm_framebuffer **fb, struct i915_vma **vma) { - struct drm_i915_private *i915 = to_i915(this->base.dev); + struct intel_display *display = to_intel_display(this); struct intel_crtc *crtc; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_plane *plane = to_intel_plane(crtc->base.primary); const struct intel_plane_state *plane_state = @@ -48,9 +48,10 @@ intel_reuse_initial_plane_obj(struct intel_crtc *this, } static bool -initial_plane_phys_lmem(struct drm_i915_private *i915, +initial_plane_phys_lmem(struct intel_display *display, struct intel_initial_plane_config *plane_config) { + struct drm_i915_private *i915 = to_i915(display->drm); gen8_pte_t __iomem *gte = to_gt(i915)->ggtt->gsm; struct intel_memory_region *mem; dma_addr_t dma_addr; @@ -63,7 +64,7 @@ initial_plane_phys_lmem(struct drm_i915_private *i915, pte = ioread64(gte); if (!(pte & GEN12_GGTT_PTE_LM)) { - drm_err(&i915->drm, + drm_err(display->drm, "Initial plane programming missing PTE_LM bit\n"); return false; } @@ -75,7 +76,7 @@ initial_plane_phys_lmem(struct drm_i915_private *i915, else mem = i915->mm.stolen_region; if (!mem) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Initial plane memory region not initialized\n"); return false; } @@ -85,13 +86,13 @@ initial_plane_phys_lmem(struct drm_i915_private *i915, * ever be placed in the stolen portion. */ if (dma_addr < mem->region.start || dma_addr > mem->region.end) { - drm_err(&i915->drm, + drm_err(display->drm, "Initial plane programming using invalid range, dma_addr=%pa (%s [%pa-%pa])\n", &dma_addr, mem->region.name, &mem->region.start, &mem->region.end); return false; } - drm_dbg(&i915->drm, + drm_dbg(display->drm, "Using dma_addr=%pa, based on initial plane programming\n", &dma_addr); @@ -102,9 +103,10 @@ initial_plane_phys_lmem(struct drm_i915_private *i915, } static bool -initial_plane_phys_smem(struct drm_i915_private *i915, +initial_plane_phys_smem(struct intel_display *display, struct intel_initial_plane_config *plane_config) { + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_memory_region *mem; u32 base; @@ -112,7 +114,7 @@ initial_plane_phys_smem(struct drm_i915_private *i915, mem = i915->mm.stolen_region; if (!mem) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Initial plane memory region not initialized\n"); return false; } @@ -125,19 +127,22 @@ initial_plane_phys_smem(struct drm_i915_private *i915, } static bool -initial_plane_phys(struct drm_i915_private *i915, +initial_plane_phys(struct intel_display *display, struct intel_initial_plane_config *plane_config) { + struct drm_i915_private *i915 = to_i915(display->drm); + if (IS_DGFX(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915)) - return initial_plane_phys_lmem(i915, plane_config); + return initial_plane_phys_lmem(display, plane_config); else - return initial_plane_phys_smem(i915, plane_config); + return initial_plane_phys_smem(display, plane_config); } static struct i915_vma * -initial_plane_vma(struct drm_i915_private *i915, +initial_plane_vma(struct intel_display *display, struct intel_initial_plane_config *plane_config) { + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_memory_region *mem; struct drm_i915_gem_object *obj; struct drm_mm_node orig_mm = {}; @@ -149,7 +154,7 @@ initial_plane_vma(struct drm_i915_private *i915, if (plane_config->size == 0) return NULL; - if (!initial_plane_phys(i915, plane_config)) + if (!initial_plane_phys(display, plane_config)) return NULL; phys_base = plane_config->phys_base; @@ -168,7 +173,7 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) && mem == i915->mm.stolen_region && size * 2 > i915->dsm.usable_size) { - drm_dbg_kms(&i915->drm, "Initial FB size exceeds half of stolen, discarding\n"); + drm_dbg_kms(display->drm, "Initial FB size exceeds half of stolen, discarding\n"); return NULL; } @@ -176,7 +181,7 @@ initial_plane_vma(struct drm_i915_private *i915, I915_BO_ALLOC_USER | I915_BO_PREALLOC); if (IS_ERR(obj)) { - drm_dbg_kms(&i915->drm, "Failed to preallocate initial FB in %s\n", + drm_dbg_kms(display->drm, "Failed to preallocate initial FB in %s\n", mem->region.name); return NULL; } @@ -254,7 +259,7 @@ retry: if (drm_mm_node_allocated(&orig_mm)) drm_mm_remove_node(&orig_mm); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Initial plane fb bound to 0x%x in the ggtt (original 0x%x)\n", i915_ggtt_offset(vma), plane_config->base); @@ -271,8 +276,7 @@ static bool intel_alloc_initial_plane_obj(struct intel_crtc *crtc, struct intel_initial_plane_config *plane_config) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(crtc); struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_framebuffer *fb = &plane_config->fb->base; struct i915_vma *vma; @@ -284,13 +288,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, case I915_FORMAT_MOD_4_TILED: break; default: - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Unsupported modifier for initial FB: 0x%llx\n", fb->modifier); return false; } - vma = initial_plane_vma(dev_priv, plane_config); + vma = initial_plane_vma(display, plane_config); if (!vma) return false; @@ -303,7 +307,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, if (intel_framebuffer_init(to_intel_framebuffer(fb), intel_bo_to_drm_bo(vma->obj), &mode_cmd)) { - drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n"); + drm_dbg_kms(display->drm, "intel fb init failed\n"); goto err_vma; } @@ -410,12 +414,12 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config) i915_vma_put(plane_config->vma); } -void intel_initial_plane_config(struct drm_i915_private *i915) +void intel_initial_plane_config(struct intel_display *display) { struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {}; struct intel_crtc *crtc; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_initial_plane_config *plane_config = &plane_configs[crtc->pipe]; @@ -429,7 +433,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915) * can even allow for smooth boot transitions if the BIOS * fb is large enough for the active pipe configuration. */ - i915->display.funcs.display->get_initial_plane_config(crtc, plane_config); + display->funcs.display->get_initial_plane_config(crtc, plane_config); /* * If the fb is shared between multiple heads, we'll @@ -437,7 +441,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915) */ intel_find_initial_plane_obj(crtc, plane_configs); - if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config)) + if (display->funcs.display->fixup_initial_plane_config(crtc, plane_config)) intel_crtc_wait_for_next_vblank(crtc); plane_config_fini(plane_config); diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.h b/drivers/gpu/drm/i915/display/intel_plane_initial.h index 64ab95239cd4..6c6aa717ed21 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.h +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.h @@ -6,8 +6,8 @@ #ifndef __INTEL_PLANE_INITIAL_H__ #define __INTEL_PLANE_INITIAL_H__ -struct drm_i915_private; +struct intel_display; -void intel_initial_plane_config(struct drm_i915_private *i915); +void intel_initial_plane_config(struct intel_display *display); #endif diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index cdd314956a31..975520322136 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -5,16 +5,50 @@ #include <linux/bitops.h> -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_de.h" #include "intel_display_trace.h" #include "intel_pmdemand.h" +#include "intel_step.h" #include "skl_watermark.h" +struct pmdemand_params { + u16 qclk_gv_bw; + u8 voltage_index; + u8 qclk_gv_index; + u8 active_pipes; + u8 active_dbufs; /* pre-Xe3 only */ + /* Total number of non type C active phys from active_phys_mask */ + u8 active_phys; + u8 plls; + u16 cdclk_freq_mhz; + /* max from ddi_clocks[] */ + u16 ddiclk_max; + u8 scalers; /* pre-Xe3 only */ +}; + +struct intel_pmdemand_state { + struct intel_global_state base; + + /* Maintain a persistent list of port clocks across all crtcs */ + int ddi_clocks[I915_MAX_PIPES]; + + /* Maintain a persistent list of non type C phys mask */ + u16 active_combo_phys_mask; + + /* Parameters to be configured in the pmdemand registers */ + struct pmdemand_params params; +}; + +struct intel_pmdemand_state *to_intel_pmdemand_state(struct intel_global_state *obj_state) +{ + return container_of(obj_state, struct intel_pmdemand_state, base); +} + static struct intel_global_state * intel_pmdemand_duplicate_state(struct intel_global_obj *obj) { @@ -41,10 +75,10 @@ static const struct intel_global_state_funcs intel_pmdemand_funcs = { static struct intel_pmdemand_state * intel_atomic_get_pmdemand_state(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_global_state *pmdemand_state = intel_atomic_get_global_obj_state(state, - &i915->display.pmdemand.obj); + &display->pmdemand.obj); if (IS_ERR(pmdemand_state)) return ERR_CAST(pmdemand_state); @@ -55,10 +89,10 @@ intel_atomic_get_pmdemand_state(struct intel_atomic_state *state) static struct intel_pmdemand_state * intel_atomic_get_old_pmdemand_state(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_global_state *pmdemand_state = intel_atomic_get_old_global_obj_state(state, - &i915->display.pmdemand.obj); + &display->pmdemand.obj); if (!pmdemand_state) return NULL; @@ -69,10 +103,10 @@ intel_atomic_get_old_pmdemand_state(struct intel_atomic_state *state) static struct intel_pmdemand_state * intel_atomic_get_new_pmdemand_state(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_global_state *pmdemand_state = intel_atomic_get_new_global_obj_state(state, - &i915->display.pmdemand.obj); + &display->pmdemand.obj); if (!pmdemand_state) return NULL; @@ -80,7 +114,7 @@ intel_atomic_get_new_pmdemand_state(struct intel_atomic_state *state) return to_intel_pmdemand_state(pmdemand_state); } -int intel_pmdemand_init(struct drm_i915_private *i915) +int intel_pmdemand_init(struct intel_display *display) { struct intel_pmdemand_state *pmdemand_state; @@ -88,32 +122,32 @@ int intel_pmdemand_init(struct drm_i915_private *i915) if (!pmdemand_state) return -ENOMEM; - intel_atomic_global_obj_init(i915, &i915->display.pmdemand.obj, + intel_atomic_global_obj_init(display, &display->pmdemand.obj, &pmdemand_state->base, &intel_pmdemand_funcs); - if (IS_DISPLAY_VERx100_STEP(i915, 1400, STEP_A0, STEP_C0)) + if (IS_DISPLAY_VERx100_STEP(display, 1400, STEP_A0, STEP_C0)) /* Wa_14016740474 */ - intel_de_rmw(i915, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE); + intel_de_rmw(display, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE); return 0; } -void intel_pmdemand_init_early(struct drm_i915_private *i915) +void intel_pmdemand_init_early(struct intel_display *display) { - mutex_init(&i915->display.pmdemand.lock); - init_waitqueue_head(&i915->display.pmdemand.waitqueue); + mutex_init(&display->pmdemand.lock); + init_waitqueue_head(&display->pmdemand.waitqueue); } void -intel_pmdemand_update_phys_mask(struct drm_i915_private *i915, +intel_pmdemand_update_phys_mask(struct intel_display *display, struct intel_encoder *encoder, struct intel_pmdemand_state *pmdemand_state, bool set_bit) { enum phy phy; - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return; if (!encoder) @@ -131,18 +165,18 @@ intel_pmdemand_update_phys_mask(struct drm_i915_private *i915, } void -intel_pmdemand_update_port_clock(struct drm_i915_private *i915, +intel_pmdemand_update_port_clock(struct intel_display *display, struct intel_pmdemand_state *pmdemand_state, enum pipe pipe, int port_clock) { - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return; pmdemand_state->ddi_clocks[pipe] = port_clock; } static void -intel_pmdemand_update_max_ddiclk(struct drm_i915_private *i915, +intel_pmdemand_update_max_ddiclk(struct intel_display *display, struct intel_atomic_state *state, struct intel_pmdemand_state *pmdemand_state) { @@ -152,7 +186,7 @@ intel_pmdemand_update_max_ddiclk(struct drm_i915_private *i915, int i; for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) - intel_pmdemand_update_port_clock(i915, pmdemand_state, + intel_pmdemand_update_port_clock(display, pmdemand_state, crtc->pipe, new_crtc_state->port_clock); @@ -163,7 +197,7 @@ intel_pmdemand_update_max_ddiclk(struct drm_i915_private *i915, } static void -intel_pmdemand_update_connector_phys(struct drm_i915_private *i915, +intel_pmdemand_update_connector_phys(struct intel_display *display, struct intel_atomic_state *state, struct drm_connector_state *conn_state, bool set_bit, @@ -184,12 +218,12 @@ intel_pmdemand_update_connector_phys(struct drm_i915_private *i915, if (!crtc_state->hw.active) return; - intel_pmdemand_update_phys_mask(i915, encoder, pmdemand_state, + intel_pmdemand_update_phys_mask(display, encoder, pmdemand_state, set_bit); } static void -intel_pmdemand_update_active_non_tc_phys(struct drm_i915_private *i915, +intel_pmdemand_update_active_non_tc_phys(struct intel_display *display, struct intel_atomic_state *state, struct intel_pmdemand_state *pmdemand_state) { @@ -204,12 +238,12 @@ intel_pmdemand_update_active_non_tc_phys(struct drm_i915_private *i915, continue; /* First clear the active phys in the old connector state */ - intel_pmdemand_update_connector_phys(i915, state, + intel_pmdemand_update_connector_phys(display, state, old_conn_state, false, pmdemand_state); /* Then set the active phys in new connector state */ - intel_pmdemand_update_connector_phys(i915, state, + intel_pmdemand_update_connector_phys(display, state, new_conn_state, true, pmdemand_state); } @@ -220,7 +254,7 @@ intel_pmdemand_update_active_non_tc_phys(struct drm_i915_private *i915, } static bool -intel_pmdemand_encoder_has_tc_phy(struct drm_i915_private *i915, +intel_pmdemand_encoder_has_tc_phy(struct intel_display *display, struct intel_encoder *encoder) { return encoder && intel_encoder_is_tc(encoder); @@ -229,7 +263,7 @@ intel_pmdemand_encoder_has_tc_phy(struct drm_i915_private *i915, static bool intel_pmdemand_connector_needs_update(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct drm_connector_state *old_conn_state; struct drm_connector_state *new_conn_state; struct drm_connector *connector; @@ -246,8 +280,8 @@ intel_pmdemand_connector_needs_update(struct intel_atomic_state *state) continue; if (old_encoder == new_encoder || - (intel_pmdemand_encoder_has_tc_phy(i915, old_encoder) && - intel_pmdemand_encoder_has_tc_phy(i915, new_encoder))) + (intel_pmdemand_encoder_has_tc_phy(display, old_encoder) && + intel_pmdemand_encoder_has_tc_phy(display, new_encoder))) continue; return true; @@ -304,13 +338,13 @@ static bool intel_pmdemand_needs_update(struct intel_atomic_state *state) int intel_pmdemand_atomic_check(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_bw_state *new_bw_state; const struct intel_cdclk_state *new_cdclk_state; const struct intel_dbuf_state *new_dbuf_state; struct intel_pmdemand_state *new_pmdemand_state; - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return 0; if (!intel_pmdemand_needs_update(state)) @@ -332,14 +366,14 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) if (IS_ERR(new_dbuf_state)) return PTR_ERR(new_dbuf_state); - if (DISPLAY_VER(i915) < 30) { + if (DISPLAY_VER(display) < 30) { new_pmdemand_state->params.active_dbufs = min_t(u8, hweight8(new_dbuf_state->enabled_slices), 3); new_pmdemand_state->params.active_pipes = min_t(u8, hweight8(new_dbuf_state->active_pipes), 3); } else { new_pmdemand_state->params.active_pipes = - min_t(u8, hweight8(new_dbuf_state->active_pipes), INTEL_NUM_PIPES(i915)); + min_t(u8, hweight8(new_dbuf_state->active_pipes), INTEL_NUM_PIPES(display)); } new_cdclk_state = intel_atomic_get_cdclk_state(state); @@ -351,9 +385,9 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) new_pmdemand_state->params.cdclk_freq_mhz = DIV_ROUND_UP(new_cdclk_state->actual.cdclk, 1000); - intel_pmdemand_update_max_ddiclk(i915, state, new_pmdemand_state); + intel_pmdemand_update_max_ddiclk(display, state, new_pmdemand_state); - intel_pmdemand_update_active_non_tc_phys(i915, state, new_pmdemand_state); + intel_pmdemand_update_active_non_tc_phys(display, state, new_pmdemand_state); /* * Active_PLLs starts with 1 because of CDCLK PLL. @@ -374,36 +408,36 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) return intel_atomic_lock_global_state(&new_pmdemand_state->base); } -static bool intel_pmdemand_check_prev_transaction(struct drm_i915_private *i915) +static bool intel_pmdemand_check_prev_transaction(struct intel_display *display) { - return !(intel_de_wait_for_clear(i915, + return !(intel_de_wait_for_clear(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), XELPDP_PMDEMAND_REQ_ENABLE, 10) || - intel_de_wait_for_clear(i915, + intel_de_wait_for_clear(display, GEN12_DCPR_STATUS_1, XELPDP_PMDEMAND_INFLIGHT_STATUS, 10)); } void -intel_pmdemand_init_pmdemand_params(struct drm_i915_private *i915, +intel_pmdemand_init_pmdemand_params(struct intel_display *display, struct intel_pmdemand_state *pmdemand_state) { u32 reg1, reg2; - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return; - mutex_lock(&i915->display.pmdemand.lock); - if (drm_WARN_ON(&i915->drm, - !intel_pmdemand_check_prev_transaction(i915))) { + mutex_lock(&display->pmdemand.lock); + if (drm_WARN_ON(display->drm, + !intel_pmdemand_check_prev_transaction(display))) { memset(&pmdemand_state->params, 0, sizeof(pmdemand_state->params)); goto unlock; } - reg1 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0)); + reg1 = intel_de_read(display, XELPDP_INITIATE_PMDEMAND_REQUEST(0)); - reg2 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)); + reg2 = intel_de_read(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1)); pmdemand_state->params.qclk_gv_bw = REG_FIELD_GET(XELPDP_PMDEMAND_QCLK_GV_BW_MASK, reg1); @@ -419,7 +453,7 @@ intel_pmdemand_init_pmdemand_params(struct drm_i915_private *i915, pmdemand_state->params.ddiclk_max = REG_FIELD_GET(XELPDP_PMDEMAND_DDICLK_FREQ_MASK, reg2); - if (DISPLAY_VER(i915) >= 30) { + if (DISPLAY_VER(display) >= 30) { pmdemand_state->params.active_pipes = REG_FIELD_GET(XE3_PMDEMAND_PIPES_MASK, reg1); } else { @@ -433,49 +467,49 @@ intel_pmdemand_init_pmdemand_params(struct drm_i915_private *i915, } unlock: - mutex_unlock(&i915->display.pmdemand.lock); + mutex_unlock(&display->pmdemand.lock); } -static bool intel_pmdemand_req_complete(struct drm_i915_private *i915) +static bool intel_pmdemand_req_complete(struct intel_display *display) { - return !(intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) & + return !(intel_de_read(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) & XELPDP_PMDEMAND_REQ_ENABLE); } -static void intel_pmdemand_wait(struct drm_i915_private *i915) +static void intel_pmdemand_wait(struct intel_display *display) { - if (!wait_event_timeout(i915->display.pmdemand.waitqueue, - intel_pmdemand_req_complete(i915), + if (!wait_event_timeout(display->pmdemand.waitqueue, + intel_pmdemand_req_complete(display), msecs_to_jiffies_timeout(10))) - drm_err(&i915->drm, + drm_err(display->drm, "timed out waiting for Punit PM Demand Response\n"); } /* Required to be programmed during Display Init Sequences. */ -void intel_pmdemand_program_dbuf(struct drm_i915_private *i915, +void intel_pmdemand_program_dbuf(struct intel_display *display, u8 dbuf_slices) { u32 dbufs = min_t(u32, hweight8(dbuf_slices), 3); /* PM Demand only tracks active dbufs on pre-Xe3 platforms */ - if (DISPLAY_VER(i915) >= 30) + if (DISPLAY_VER(display) >= 30) return; - mutex_lock(&i915->display.pmdemand.lock); - if (drm_WARN_ON(&i915->drm, - !intel_pmdemand_check_prev_transaction(i915))) + mutex_lock(&display->pmdemand.lock); + if (drm_WARN_ON(display->drm, + !intel_pmdemand_check_prev_transaction(display))) goto unlock; - intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0), + intel_de_rmw(display, XELPDP_INITIATE_PMDEMAND_REQUEST(0), XELPDP_PMDEMAND_DBUFS_MASK, REG_FIELD_PREP(XELPDP_PMDEMAND_DBUFS_MASK, dbufs)); - intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, + intel_de_rmw(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, XELPDP_PMDEMAND_REQ_ENABLE); - intel_pmdemand_wait(i915); + intel_pmdemand_wait(display); unlock: - mutex_unlock(&i915->display.pmdemand.lock); + mutex_unlock(&display->pmdemand.lock); } static void @@ -535,38 +569,37 @@ intel_pmdemand_update_params(struct intel_display *display, } static void -intel_pmdemand_program_params(struct drm_i915_private *i915, +intel_pmdemand_program_params(struct intel_display *display, const struct intel_pmdemand_state *new, const struct intel_pmdemand_state *old, bool serialized) { - struct intel_display *display = &i915->display; bool changed = false; u32 reg1, mod_reg1; u32 reg2, mod_reg2; - mutex_lock(&i915->display.pmdemand.lock); - if (drm_WARN_ON(&i915->drm, - !intel_pmdemand_check_prev_transaction(i915))) + mutex_lock(&display->pmdemand.lock); + if (drm_WARN_ON(display->drm, + !intel_pmdemand_check_prev_transaction(display))) goto unlock; - reg1 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0)); + reg1 = intel_de_read(display, XELPDP_INITIATE_PMDEMAND_REQUEST(0)); mod_reg1 = reg1; - reg2 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)); + reg2 = intel_de_read(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1)); mod_reg2 = reg2; intel_pmdemand_update_params(display, new, old, &mod_reg1, &mod_reg2, serialized); if (reg1 != mod_reg1) { - intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0), + intel_de_write(display, XELPDP_INITIATE_PMDEMAND_REQUEST(0), mod_reg1); changed = true; } if (reg2 != mod_reg2) { - intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), + intel_de_write(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), mod_reg2); changed = true; } @@ -575,17 +608,17 @@ intel_pmdemand_program_params(struct drm_i915_private *i915, if (!changed) goto unlock; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "initate pmdemand request values: (0x%x 0x%x)\n", mod_reg1, mod_reg2); - intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, + intel_de_rmw(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, XELPDP_PMDEMAND_REQ_ENABLE); - intel_pmdemand_wait(i915); + intel_pmdemand_wait(display); unlock: - mutex_unlock(&i915->display.pmdemand.lock); + mutex_unlock(&display->pmdemand.lock); } static bool @@ -597,13 +630,13 @@ intel_pmdemand_state_changed(const struct intel_pmdemand_state *new, void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_pmdemand_state *new_pmdemand_state = intel_atomic_get_new_pmdemand_state(state); const struct intel_pmdemand_state *old_pmdemand_state = intel_atomic_get_old_pmdemand_state(state); - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return; if (!new_pmdemand_state || @@ -613,20 +646,20 @@ void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state) WARN_ON(!new_pmdemand_state->base.changed); - intel_pmdemand_program_params(i915, new_pmdemand_state, + intel_pmdemand_program_params(display, new_pmdemand_state, old_pmdemand_state, intel_atomic_global_state_is_serialized(state)); } void intel_pmdemand_post_plane_update(struct intel_atomic_state *state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_pmdemand_state *new_pmdemand_state = intel_atomic_get_new_pmdemand_state(state); const struct intel_pmdemand_state *old_pmdemand_state = intel_atomic_get_old_pmdemand_state(state); - if (DISPLAY_VER(i915) < 14) + if (DISPLAY_VER(display) < 14) return; if (!new_pmdemand_state || @@ -636,6 +669,6 @@ void intel_pmdemand_post_plane_update(struct intel_atomic_state *state) WARN_ON(!new_pmdemand_state->base.changed); - intel_pmdemand_program_params(i915, new_pmdemand_state, NULL, + intel_pmdemand_program_params(display, new_pmdemand_state, NULL, intel_atomic_global_state_is_serialized(state)); } diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.h b/drivers/gpu/drm/i915/display/intel_pmdemand.h index a1c49efdc493..821ef2c4134a 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.h +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.h @@ -6,58 +6,31 @@ #ifndef __INTEL_PMDEMAND_H__ #define __INTEL_PMDEMAND_H__ -#include "intel_display_limits.h" -#include "intel_global_state.h" +#include <linux/types.h> -struct drm_i915_private; +enum pipe; struct intel_atomic_state; struct intel_crtc_state; +struct intel_display; struct intel_encoder; +struct intel_global_state; struct intel_plane_state; +struct intel_pmdemand_state; -struct pmdemand_params { - u16 qclk_gv_bw; - u8 voltage_index; - u8 qclk_gv_index; - u8 active_pipes; - u8 active_dbufs; /* pre-Xe3 only */ - /* Total number of non type C active phys from active_phys_mask */ - u8 active_phys; - u8 plls; - u16 cdclk_freq_mhz; - /* max from ddi_clocks[] */ - u16 ddiclk_max; - u8 scalers; /* pre-Xe3 only */ -}; +struct intel_pmdemand_state *to_intel_pmdemand_state(struct intel_global_state *obj_state); -struct intel_pmdemand_state { - struct intel_global_state base; - - /* Maintain a persistent list of port clocks across all crtcs */ - int ddi_clocks[I915_MAX_PIPES]; - - /* Maintain a persistent list of non type C phys mask */ - u16 active_combo_phys_mask; - - /* Parameters to be configured in the pmdemand registers */ - struct pmdemand_params params; -}; - -#define to_intel_pmdemand_state(global_state) \ - container_of_const((global_state), struct intel_pmdemand_state, base) - -void intel_pmdemand_init_early(struct drm_i915_private *i915); -int intel_pmdemand_init(struct drm_i915_private *i915); -void intel_pmdemand_init_pmdemand_params(struct drm_i915_private *i915, +void intel_pmdemand_init_early(struct intel_display *display); +int intel_pmdemand_init(struct intel_display *display); +void intel_pmdemand_init_pmdemand_params(struct intel_display *display, struct intel_pmdemand_state *pmdemand_state); -void intel_pmdemand_update_port_clock(struct drm_i915_private *i915, +void intel_pmdemand_update_port_clock(struct intel_display *display, struct intel_pmdemand_state *pmdemand_state, enum pipe pipe, int port_clock); -void intel_pmdemand_update_phys_mask(struct drm_i915_private *i915, +void intel_pmdemand_update_phys_mask(struct intel_display *display, struct intel_encoder *encoder, struct intel_pmdemand_state *pmdemand_state, bool clear_bit); -void intel_pmdemand_program_dbuf(struct drm_i915_private *i915, +void intel_pmdemand_program_dbuf(struct intel_display *display, u8 dbuf_slices); void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state); void intel_pmdemand_post_plane_update(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 093fe37a3983..eb35f0249f2b 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -134,7 +134,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) */ if (!pll_enabled) { release_cl_override = display->platform.cherryview && - !chv_phy_powergate_ch(dev_priv, phy, ch, true); + !chv_phy_powergate_ch(display, phy, ch, true); if (vlv_force_pll_on(dev_priv, pipe, vlv_get_dpll(dev_priv))) { drm_err(display->drm, @@ -163,7 +163,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) vlv_force_pll_off(dev_priv, pipe); if (release_cl_override) - chv_phy_powergate_ch(dev_priv, phy, ch, false); + chv_phy_powergate_ch(display, phy, ch, false); } } @@ -668,23 +668,24 @@ static void wait_panel_power_cycle(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); ktime_t panel_power_on_time; - s64 panel_power_off_duration; - - drm_dbg_kms(display->drm, - "[ENCODER:%d:%s] %s wait for panel power cycle\n", - dig_port->base.base.base.id, dig_port->base.base.name, - pps_name(intel_dp)); + s64 panel_power_off_duration, remaining; /* take the difference of current time and panel power off time - * and then make panel wait for t11_t12 if needed. */ + * and then make panel wait for power_cycle if needed. */ panel_power_on_time = ktime_get_boottime(); panel_power_off_duration = ktime_ms_delta(panel_power_on_time, intel_dp->pps.panel_power_off_time); + remaining = max(0, intel_dp->pps.panel_power_cycle_delay - panel_power_off_duration); + + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] %s wait for panel power cycle (%lld ms remaining)\n", + dig_port->base.base.base.id, dig_port->base.base.name, + pps_name(intel_dp), remaining); + /* When we disable the VDD override bit last we have to do the manual * wait. */ - if (panel_power_off_duration < (s64)intel_dp->pps.panel_power_cycle_delay) - wait_remaining_ms_from_jiffies(jiffies, - intel_dp->pps.panel_power_cycle_delay - panel_power_off_duration); + if (remaining) + wait_remaining_ms_from_jiffies(jiffies, remaining); wait_panel_status(intel_dp, IDLE_CYCLE_MASK, IDLE_CYCLE_VALUE); } @@ -1387,10 +1388,10 @@ static void pps_init_timestamps(struct intel_dp *intel_dp) } static void -intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) +intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct intel_pps_delays *seq) { struct intel_display *display = to_intel_display(intel_dp); - u32 pp_on, pp_off, pp_ctl; + u32 pp_on, pp_off, pp_ctl, power_cycle_delay; struct pps_registers regs; intel_pps_get_registers(intel_dp, ®s); @@ -1405,59 +1406,77 @@ intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) pp_off = intel_de_read(display, regs.pp_off); /* Pull timing values out of registers */ - seq->t1_t3 = REG_FIELD_GET(PANEL_POWER_UP_DELAY_MASK, pp_on); - seq->t8 = REG_FIELD_GET(PANEL_LIGHT_ON_DELAY_MASK, pp_on); - seq->t9 = REG_FIELD_GET(PANEL_LIGHT_OFF_DELAY_MASK, pp_off); - seq->t10 = REG_FIELD_GET(PANEL_POWER_DOWN_DELAY_MASK, pp_off); + seq->power_up = REG_FIELD_GET(PANEL_POWER_UP_DELAY_MASK, pp_on); + seq->backlight_on = REG_FIELD_GET(PANEL_LIGHT_ON_DELAY_MASK, pp_on); + seq->backlight_off = REG_FIELD_GET(PANEL_LIGHT_OFF_DELAY_MASK, pp_off); + seq->power_down = REG_FIELD_GET(PANEL_POWER_DOWN_DELAY_MASK, pp_off); if (i915_mmio_reg_valid(regs.pp_div)) { u32 pp_div; pp_div = intel_de_read(display, regs.pp_div); - seq->t11_t12 = REG_FIELD_GET(PANEL_POWER_CYCLE_DELAY_MASK, pp_div) * 1000; + power_cycle_delay = REG_FIELD_GET(PANEL_POWER_CYCLE_DELAY_MASK, pp_div); } else { - seq->t11_t12 = REG_FIELD_GET(BXT_POWER_CYCLE_DELAY_MASK, pp_ctl) * 1000; + power_cycle_delay = REG_FIELD_GET(BXT_POWER_CYCLE_DELAY_MASK, pp_ctl); } + + /* hardware wants <delay>+1 in 100ms units */ + seq->power_cycle = power_cycle_delay ? (power_cycle_delay - 1) * 1000 : 0; } static void intel_pps_dump_state(struct intel_dp *intel_dp, const char *state_name, - const struct edp_power_seq *seq) + const struct intel_pps_delays *seq) { struct intel_display *display = to_intel_display(intel_dp); drm_dbg_kms(display->drm, - "%s t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n", - state_name, - seq->t1_t3, seq->t8, seq->t9, seq->t10, seq->t11_t12); + "%s power_up %d backlight_on %d backlight_off %d power_down %d power_cycle %d\n", + state_name, seq->power_up, seq->backlight_on, + seq->backlight_off, seq->power_down, seq->power_cycle); } static void intel_pps_verify_state(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct edp_power_seq hw; - struct edp_power_seq *sw = &intel_dp->pps.pps_delays; + struct intel_pps_delays hw; + struct intel_pps_delays *sw = &intel_dp->pps.pps_delays; intel_pps_readout_hw_state(intel_dp, &hw); - if (hw.t1_t3 != sw->t1_t3 || hw.t8 != sw->t8 || hw.t9 != sw->t9 || - hw.t10 != sw->t10 || hw.t11_t12 != sw->t11_t12) { + if (hw.power_up != sw->power_up || + hw.backlight_on != sw->backlight_on || + hw.backlight_off != sw->backlight_off || + hw.power_down != sw->power_down || + hw.power_cycle != sw->power_cycle) { drm_err(display->drm, "PPS state mismatch\n"); intel_pps_dump_state(intel_dp, "sw", sw); intel_pps_dump_state(intel_dp, "hw", &hw); } } -static bool pps_delays_valid(struct edp_power_seq *delays) +static bool pps_delays_valid(struct intel_pps_delays *delays) +{ + return delays->power_up || delays->backlight_on || delays->backlight_off || + delays->power_down || delays->power_cycle; +} + +static int msecs_to_pps_units(int msecs) { - return delays->t1_t3 || delays->t8 || delays->t9 || - delays->t10 || delays->t11_t12; + /* PPS uses 100us units */ + return msecs * 10; +} + +static int pps_units_to_msecs(int val) +{ + /* PPS uses 100us units */ + return DIV_ROUND_UP(val, 10); } static void pps_init_delays_bios(struct intel_dp *intel_dp, - struct edp_power_seq *bios) + struct intel_pps_delays *bios) { struct intel_display *display = to_intel_display(intel_dp); @@ -1472,7 +1491,7 @@ static void pps_init_delays_bios(struct intel_dp *intel_dp, } static void pps_init_delays_vbt(struct intel_dp *intel_dp, - struct edp_power_seq *vbt) + struct intel_pps_delays *vbt) { struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; @@ -1488,39 +1507,28 @@ static void pps_init_delays_vbt(struct intel_dp *intel_dp, * seems sufficient to avoid this problem. */ if (intel_has_quirk(display, QUIRK_INCREASE_T12_DELAY)) { - vbt->t11_t12 = max_t(u16, vbt->t11_t12, 1300 * 10); + vbt->power_cycle = max_t(u16, vbt->power_cycle, msecs_to_pps_units(1300)); drm_dbg_kms(display->drm, "Increasing T12 panel delay as per the quirk to %d\n", - vbt->t11_t12); + vbt->power_cycle); } - /* T11_T12 delay is special and actually in units of 100ms, but zero - * based in the hw (so we need to add 100 ms). But the sw vbt - * table multiplies it with 1000 to make it in units of 100usec, - * too. */ - vbt->t11_t12 += 100 * 10; - intel_pps_dump_state(intel_dp, "vbt", vbt); } static void pps_init_delays_spec(struct intel_dp *intel_dp, - struct edp_power_seq *spec) + struct intel_pps_delays *spec) { struct intel_display *display = to_intel_display(intel_dp); lockdep_assert_held(&display->pps.mutex); - /* Upper limits from eDP 1.3 spec. Note that we use the clunky units of - * our hw here, which are all in 100usec. */ - spec->t1_t3 = 210 * 10; - spec->t8 = 50 * 10; /* no limit for t8, use t7 instead */ - spec->t9 = 50 * 10; /* no limit for t9, make it symmetric with t8 */ - spec->t10 = 500 * 10; - /* This one is special and actually in units of 100ms, but zero - * based in the hw (so we need to add 100 ms). But the sw vbt - * table multiplies it with 1000 to make it in units of 100usec, - * too. */ - spec->t11_t12 = (510 + 100) * 10; + /* Upper limits from eDP 1.3 spec */ + spec->power_up = msecs_to_pps_units(10 + 200); /* T1+T3 */ + spec->backlight_on = msecs_to_pps_units(50); /* no limit for T8, use T7 instead */ + spec->backlight_off = msecs_to_pps_units(50); /* no limit for T9, make it symmetric with T8 */ + spec->power_down = msecs_to_pps_units(500); /* T10 */ + spec->power_cycle = msecs_to_pps_units(10 + 500); /* T11+T12 */ intel_pps_dump_state(intel_dp, "spec", spec); } @@ -1528,7 +1536,7 @@ static void pps_init_delays_spec(struct intel_dp *intel_dp, static void pps_init_delays(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct edp_power_seq cur, vbt, spec, + struct intel_pps_delays cur, vbt, spec, *final = &intel_dp->pps.pps_delays; lockdep_assert_held(&display->pps.mutex); @@ -1546,20 +1554,18 @@ static void pps_init_delays(struct intel_dp *intel_dp) #define assign_final(field) final->field = (max(cur.field, vbt.field) == 0 ? \ spec.field : \ max(cur.field, vbt.field)) - assign_final(t1_t3); - assign_final(t8); - assign_final(t9); - assign_final(t10); - assign_final(t11_t12); + assign_final(power_up); + assign_final(backlight_on); + assign_final(backlight_off); + assign_final(power_down); + assign_final(power_cycle); #undef assign_final -#define get_delay(field) (DIV_ROUND_UP(final->field, 10)) - intel_dp->pps.panel_power_up_delay = get_delay(t1_t3); - intel_dp->pps.backlight_on_delay = get_delay(t8); - intel_dp->pps.backlight_off_delay = get_delay(t9); - intel_dp->pps.panel_power_down_delay = get_delay(t10); - intel_dp->pps.panel_power_cycle_delay = get_delay(t11_t12); -#undef get_delay + intel_dp->pps.panel_power_up_delay = pps_units_to_msecs(final->power_up); + intel_dp->pps.backlight_on_delay = pps_units_to_msecs(final->backlight_on); + intel_dp->pps.backlight_off_delay = pps_units_to_msecs(final->backlight_off); + intel_dp->pps.panel_power_down_delay = pps_units_to_msecs(final->power_down); + intel_dp->pps.panel_power_cycle_delay = pps_units_to_msecs(final->power_cycle); drm_dbg_kms(display->drm, "panel power up delay %d, power down delay %d, power cycle delay %d\n", @@ -1573,19 +1579,20 @@ static void pps_init_delays(struct intel_dp *intel_dp) /* * We override the HW backlight delays to 1 because we do manual waits - * on them. For T8, even BSpec recommends doing it. For T9, if we - * don't do this, we'll end up waiting for the backlight off delay - * twice: once when we do the manual sleep, and once when we disable - * the panel and wait for the PP_STATUS bit to become zero. + * on them. For backlight_on, even BSpec recommends doing it. For + * backlight_off, if we don't do this, we'll end up waiting for the + * backlight off delay twice: once when we do the manual sleep, and + * once when we disable the panel and wait for the PP_STATUS bit to + * become zero. */ - final->t8 = 1; - final->t9 = 1; + final->backlight_on = 1; + final->backlight_off = 1; /* - * HW has only a 100msec granularity for t11_t12 so round it up + * HW has only a 100msec granularity for power_cycle so round it up * accordingly. */ - final->t11_t12 = roundup(final->t11_t12, 100 * 10); + final->power_cycle = roundup(final->power_cycle, msecs_to_pps_units(100)); } static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd) @@ -1596,7 +1603,7 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd int div = DISPLAY_RUNTIME_INFO(display)->rawclk_freq / 1000; struct pps_registers regs; enum port port = dp_to_dig_port(intel_dp)->base.port; - const struct edp_power_seq *seq = &intel_dp->pps.pps_delays; + const struct intel_pps_delays *seq = &intel_dp->pps.pps_delays; lockdep_assert_held(&display->pps.mutex); @@ -1629,10 +1636,10 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd intel_de_write(display, regs.pp_ctrl, pp); } - pp_on = REG_FIELD_PREP(PANEL_POWER_UP_DELAY_MASK, seq->t1_t3) | - REG_FIELD_PREP(PANEL_LIGHT_ON_DELAY_MASK, seq->t8); - pp_off = REG_FIELD_PREP(PANEL_LIGHT_OFF_DELAY_MASK, seq->t9) | - REG_FIELD_PREP(PANEL_POWER_DOWN_DELAY_MASK, seq->t10); + pp_on = REG_FIELD_PREP(PANEL_POWER_UP_DELAY_MASK, seq->power_up) | + REG_FIELD_PREP(PANEL_LIGHT_ON_DELAY_MASK, seq->backlight_on); + pp_off = REG_FIELD_PREP(PANEL_LIGHT_OFF_DELAY_MASK, seq->backlight_off) | + REG_FIELD_PREP(PANEL_POWER_DOWN_DELAY_MASK, seq->power_down); /* Haswell doesn't have any port selection bits for the panel * power sequencer any more. */ @@ -1665,11 +1672,14 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd */ if (i915_mmio_reg_valid(regs.pp_div)) intel_de_write(display, regs.pp_div, - REG_FIELD_PREP(PP_REFERENCE_DIVIDER_MASK, (100 * div) / 2 - 1) | REG_FIELD_PREP(PANEL_POWER_CYCLE_DELAY_MASK, DIV_ROUND_UP(seq->t11_t12, 1000))); + REG_FIELD_PREP(PP_REFERENCE_DIVIDER_MASK, + (100 * div) / 2 - 1) | + REG_FIELD_PREP(PANEL_POWER_CYCLE_DELAY_MASK, + DIV_ROUND_UP(seq->power_cycle, 1000) + 1)); else intel_de_rmw(display, regs.pp_ctrl, BXT_POWER_CYCLE_DELAY_MASK, REG_FIELD_PREP(BXT_POWER_CYCLE_DELAY_MASK, - DIV_ROUND_UP(seq->t11_t12, 1000))); + DIV_ROUND_UP(seq->power_cycle, 1000) + 1)); drm_dbg_kms(display->drm, "panel power sequencer register settings: PP_ON %#x, PP_OFF %#x, PP_DIV %#x\n", @@ -1810,6 +1820,8 @@ static int intel_pps_show(struct seq_file *m, void *data) intel_dp->pps.panel_power_up_delay); seq_printf(m, "Panel power down delay: %d\n", intel_dp->pps.panel_power_down_delay); + seq_printf(m, "Panel power cycle delay: %d\n", + intel_dp->pps.panel_power_cycle_delay); seq_printf(m, "Backlight on delay: %d\n", intel_dp->pps.backlight_on_delay); seq_printf(m, "Backlight off delay: %d\n", diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index a784c0b81556..0b021acb330f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -871,7 +871,7 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp) val |= EDP_PSR_TP2_TP3_TIME_100us; check_tp3_sel: - if (intel_dp_source_supports_tps3(dev_priv) && + if (intel_dp_source_supports_tps3(display) && drm_dp_tps3_supported(intel_dp->dpcd)) val |= EDP_PSR_TP_TP1_TP3; else @@ -1130,18 +1130,16 @@ static void psr2_program_idle_frames(struct intel_dp *intel_dp, static void tgl_psr2_enable_dc3co(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); psr2_program_idle_frames(intel_dp, 0); - intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO); + intel_display_power_set_target_dc_state(display, DC_STATE_EN_DC3CO); } static void tgl_psr2_disable_dc3co(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); - intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + intel_display_power_set_target_dc_state(display, DC_STATE_EN_UPTO_DC6); psr2_program_idle_frames(intel_dp, psr_compute_idle_frames(intel_dp)); } @@ -1564,13 +1562,6 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int entry_setup_frames; - /* - * Current PSR panels don't work reliably with VRR enabled - * So if VRR is enabled, do not enable PSR. - */ - if (crtc_state->vrr.enable) - return false; - if (!CAN_PSR(intel_dp)) return false; @@ -1644,6 +1635,15 @@ _panel_replay_compute_config(struct intel_dp *intel_dp, return true; } +static bool intel_psr_needs_wa_18037818876(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + return (DISPLAY_VER(display) == 20 && intel_dp->psr.entry_setup_frames > 0 && + !crtc_state->has_sel_update); +} + void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) @@ -1679,6 +1679,12 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } + /* + * Currently PSR/PR doesn't work reliably with VRR enabled. + */ + if (crtc_state->vrr.enable) + return; + crtc_state->has_panel_replay = _panel_replay_compute_config(intel_dp, crtc_state, conn_state); @@ -1690,6 +1696,13 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; crtc_state->has_sel_update = intel_sel_update_config_valid(intel_dp, crtc_state); + + /* Wa_18037818876 */ + if (intel_psr_needs_wa_18037818876(intel_dp, crtc_state)) { + crtc_state->has_psr = false; + drm_dbg_kms(display->drm, + "PSR disabled to workaround PSR FSM hang issue\n"); + } } void intel_psr_get_config(struct intel_encoder *encoder, @@ -1773,23 +1786,6 @@ static void intel_psr_activate(struct intel_dp *intel_dp) intel_dp->psr.active = true; } -static u32 wa_16013835468_bit_get(struct intel_dp *intel_dp) -{ - switch (intel_dp->psr.pipe) { - case PIPE_A: - return LATENCY_REPORTING_REMOVED_PIPE_A; - case PIPE_B: - return LATENCY_REPORTING_REMOVED_PIPE_B; - case PIPE_C: - return LATENCY_REPORTING_REMOVED_PIPE_C; - case PIPE_D: - return LATENCY_REPORTING_REMOVED_PIPE_D; - default: - MISSING_CASE(intel_dp->psr.pipe); - return 0; - } -} - /* * Wa_16013835468 * Wa_14015648006 @@ -1798,23 +1794,25 @@ static void wm_optimization_wa(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(intel_dp); - bool set_wa_bit = false; + enum pipe pipe = intel_dp->psr.pipe; + bool activate = false; /* Wa_14015648006 */ - if (IS_DISPLAY_VER(display, 11, 14)) - set_wa_bit |= crtc_state->wm_level_disabled; + if (IS_DISPLAY_VER(display, 11, 14) && crtc_state->wm_level_disabled) + activate = true; /* Wa_16013835468 */ - if (DISPLAY_VER(display) == 12) - set_wa_bit |= crtc_state->hw.adjusted_mode.crtc_vblank_start != - crtc_state->hw.adjusted_mode.crtc_vdisplay; + if (DISPLAY_VER(display) == 12 && + crtc_state->hw.adjusted_mode.crtc_vblank_start != + crtc_state->hw.adjusted_mode.crtc_vdisplay) + activate = true; - if (set_wa_bit) + if (activate) intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, - 0, wa_16013835468_bit_get(intel_dp)); + 0, LATENCY_REPORTING_REMOVED(pipe)); else intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, - wa_16013835468_bit_get(intel_dp), 0); + LATENCY_REPORTING_REMOVED(pipe), 0); } static void intel_psr_enable_source(struct intel_dp *intel_dp, @@ -1908,7 +1906,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (intel_dp->psr.sel_update_enabled) { if (DISPLAY_VER(display) == 9) - intel_de_rmw(display, CHICKEN_TRANS(cpu_transcoder), 0, + intel_de_rmw(display, CHICKEN_TRANS(display, cpu_transcoder), 0, PSR2_VSC_ENABLE_PROG_HEADER | PSR2_ADD_VERTICAL_LINE_COUNT); @@ -1920,7 +1918,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (!intel_dp->psr.panel_replay_enabled && (IS_DISPLAY_VERx100_STEP(display, 1400, STEP_A0, STEP_B0) || IS_ALDERLAKE_P(dev_priv))) - intel_de_rmw(display, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), + intel_de_rmw(display, CHICKEN_TRANS(display, cpu_transcoder), 0, ADLP_1_BASED_X_GRANULARITY); /* Wa_16012604467:adlp,mtl[a0,b0] */ @@ -2114,7 +2112,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) */ if (DISPLAY_VER(display) >= 11) intel_de_rmw(display, GEN8_CHICKEN_DCPR_1, - wa_16013835468_bit_get(intel_dp), 0); + LATENCY_REPORTING_REMOVED(intel_dp->psr.pipe), 0); if (intel_dp->psr.sel_update_enabled) { /* Wa_16012604467:adlp,mtl[a0,b0] */ @@ -3335,11 +3333,10 @@ unlock: void intel_psr_init(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_connector *connector = intel_dp->attached_connector; struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - if (!(HAS_PSR(display) || HAS_DP20(dev_priv))) + if (!(HAS_PSR(display) || HAS_DP20(display))) return; /* @@ -3357,7 +3354,7 @@ void intel_psr_init(struct intel_dp *intel_dp) return; } - if ((HAS_DP20(dev_priv) && !intel_dp_is_edp(intel_dp)) || + if ((HAS_DP20(display) && !intel_dp_is_edp(intel_dp)) || DISPLAY_VER(display) >= 20) intel_dp->psr.source_panel_replay_support = true; @@ -3974,7 +3971,6 @@ DEFINE_SHOW_ATTRIBUTE(i915_psr_status); void intel_psr_connector_debugfs_add(struct intel_connector *connector) { struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct dentry *root = connector->base.debugfs_entry; if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP && @@ -3984,7 +3980,7 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector) debugfs_create_file("i915_psr_sink_status", 0444, root, connector, &i915_psr_sink_status_fops); - if (HAS_PSR(display) || HAS_DP20(i915)) + if (HAS_PSR(display) || HAS_DP20(display)) debugfs_create_file("i915_psr_status", 0444, root, connector, &i915_psr_status_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 28f497ae785b..8b30e9fd936e 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -5,7 +5,7 @@ #include <linux/dmi.h> -#include "i915_drv.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_quirks.h" diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 3dcb48879e81..1b6040892c40 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2136,6 +2136,7 @@ intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo, static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); @@ -2145,10 +2146,10 @@ intel_sdvo_detect(struct drm_connector *connector, bool force) drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_device_enabled(i915)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return connector->status; if (!intel_sdvo_set_target_output(intel_sdvo, @@ -2196,14 +2197,14 @@ intel_sdvo_detect(struct drm_connector *connector, bool force) static int intel_sdvo_get_ddc_modes(struct drm_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); int num_modes = 0; const struct drm_edid *drm_edid; drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return drm_edid_connector_add_modes(connector); /* set the bus switch and get the modes */ @@ -2297,6 +2298,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = { static int intel_sdvo_get_tv_modes(struct drm_connector *connector) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); struct intel_sdvo_connector *intel_sdvo_connector = @@ -2310,7 +2312,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector) drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return 0; /* diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index 4b3a32736fd6..41fe26dc200b 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -5,6 +5,7 @@ #include <linux/math.h> +#include "i915_drv.h" #include "i915_reg.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index b16c4d2d4077..13811244c82b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -390,7 +390,7 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_tc_port *tc = to_tc_port(dig_port); - bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + bool lane_reversal = dig_port->lane_reversal; u32 val; if (DISPLAY_VER(i915) >= 14) @@ -1013,21 +1013,52 @@ xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled) return true; } +/* + * Gfx driver WA 14020908590 for PTL tcss_rxdetect_clkswb_req/ack + * handshake violation when pwwreq= 0->1 during TC7/10 entry + */ +static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enable) +{ + /* check if mailbox is running busy */ + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + drm_dbg_kms(display->drm, + "Timeout waiting for TCSS mailbox run/busy bit to clear\n"); + return; + } + + intel_de_write(display, TCSS_DISP_MAILBOX_IN_DATA, enable ? 1 : 0); + intel_de_write(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY | + TCSS_DISP_MAILBOX_IN_CMD_DATA(0x1)); + + /* wait to clear mailbox running busy bit before continuing */ + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + drm_dbg_kms(display->drm, + "Timeout after writing data to mailbox. Mailbox run/busy bit did not clear\n"); + return; + } +} + static void __xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable) { - struct drm_i915_private *i915 = tc_to_i915(tc); + struct intel_display *display = to_intel_display(tc->dig_port); enum port port = tc->dig_port->base.port; - i915_reg_t reg = XELPDP_PORT_BUF_CTL1(i915, port); + i915_reg_t reg = XELPDP_PORT_BUF_CTL1(display, port); u32 val; assert_tc_cold_blocked(tc); - val = intel_de_read(i915, reg); + if (DISPLAY_VER(display) == 30) + xelpdp_tc_power_request_wa(display, enable); + + val = intel_de_read(display, reg); if (enable) val |= XELPDP_TCSS_POWER_REQUEST; else val &= ~XELPDP_TCSS_POWER_REQUEST; - intel_de_write(i915, reg, val); + intel_de_write(display, reg, val); } static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable) diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index fa2634aa574a..1c50732a099d 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1714,7 +1714,6 @@ intel_tv_detect(struct drm_connector *connector, bool force) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); enum drm_connector_status status; int type; @@ -1722,10 +1721,10 @@ intel_tv_detect(struct drm_connector *connector, drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, force); - if (!intel_display_device_enabled(i915)) + if (!intel_display_device_enabled(display)) return connector_status_disconnected; - if (!intel_display_driver_check_access(i915)) + if (!intel_display_driver_check_access(display)) return connector->status; if (force) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 42022756bbd5..e9b809568cd4 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -1014,6 +1014,14 @@ struct bdb_tv_options { * Block 27 - eDP VBT Block */ +struct edp_power_seq { + u16 t1_t3; + u16 t8; + u16 t9; + u16 t10; + u16 t11_t12; +} __packed; + #define EDP_18BPP 0 #define EDP_24BPP 1 #define EDP_30BPP 2 diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 40525f5c4c42..b355c479eda3 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -14,6 +14,7 @@ #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_dp.h" #include "intel_dsi.h" #include "intel_qp_tables.h" #include "intel_vdsc.h" @@ -379,7 +380,7 @@ intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder) static int intel_dsc_get_vdsc_per_pipe(const struct intel_crtc_state *crtc_state) { - return crtc_state->dsc.dsc_split ? 2 : 1; + return crtc_state->dsc.num_streams; } int intel_dsc_get_num_vdsc_instances(const struct intel_crtc_state *crtc_state) @@ -402,8 +403,10 @@ static void intel_dsc_get_pps_reg(const struct intel_crtc_state *crtc_state, int pipe_dsc = is_pipe_dsc(crtc, cpu_transcoder); - if (dsc_reg_num >= 3) + if (dsc_reg_num >= 4) MISSING_CASE(dsc_reg_num); + if (dsc_reg_num >= 3) + dsc_reg[2] = BMG_DSC2_PPS(pipe, pps); if (dsc_reg_num >= 2) dsc_reg[1] = pipe_dsc ? ICL_DSC1_PPS(pipe, pps) : DSCC_PPS(pps); if (dsc_reg_num >= 1) @@ -415,7 +418,7 @@ static void intel_dsc_pps_write(const struct intel_crtc_state *crtc_state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); - i915_reg_t dsc_reg[2]; + i915_reg_t dsc_reg[3]; int i, vdsc_per_pipe, dsc_reg_num; vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state); @@ -770,11 +773,17 @@ void intel_dsc_enable(const struct intel_crtc_state *crtc_state) intel_dsc_pps_configure(crtc_state); - dss_ctl2_val |= LEFT_BRANCH_VDSC_ENABLE; + dss_ctl2_val |= VDSC0_ENABLE; if (vdsc_instances_per_pipe > 1) { - dss_ctl2_val |= RIGHT_BRANCH_VDSC_ENABLE; + dss_ctl2_val |= VDSC1_ENABLE; dss_ctl1_val |= JOINER_ENABLE; } + + if (vdsc_instances_per_pipe > 2) { + dss_ctl2_val |= VDSC2_ENABLE; + dss_ctl2_val |= SMALL_JOINER_CONFIG_3_ENGINES; + } + if (crtc_state->joiner_pipes) { if (intel_crtc_ultrajoiner_enable_needed(crtc_state)) dss_ctl1_val |= ULTRA_JOINER_ENABLE; @@ -809,7 +818,7 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); - i915_reg_t dsc_reg[2]; + i915_reg_t dsc_reg[3]; int i, vdsc_per_pipe, dsc_reg_num; u32 val; @@ -972,12 +981,16 @@ void intel_dsc_get_config(struct intel_crtc_state *crtc_state) dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg(crtc, cpu_transcoder)); dss_ctl2 = intel_de_read(dev_priv, dss_ctl2_reg(crtc, cpu_transcoder)); - crtc_state->dsc.compression_enable = dss_ctl2 & LEFT_BRANCH_VDSC_ENABLE; + crtc_state->dsc.compression_enable = dss_ctl2 & VDSC0_ENABLE; if (!crtc_state->dsc.compression_enable) goto out; - crtc_state->dsc.dsc_split = (dss_ctl2 & RIGHT_BRANCH_VDSC_ENABLE) && - (dss_ctl1 & JOINER_ENABLE); + if (dss_ctl1 & JOINER_ENABLE && dss_ctl2 & (VDSC2_ENABLE | SMALL_JOINER_CONFIG_3_ENGINES)) + crtc_state->dsc.num_streams = 3; + else if (dss_ctl1 & JOINER_ENABLE && dss_ctl2 & VDSC1_ENABLE) + crtc_state->dsc.num_streams = 2; + else + crtc_state->dsc.num_streams = 1; intel_dsc_get_pps_config(crtc_state); out: @@ -988,10 +1001,10 @@ static void intel_vdsc_dump_state(struct drm_printer *p, int indent, const struct intel_crtc_state *crtc_state) { drm_printf_indent(p, indent, - "dsc-dss: compressed-bpp:" FXP_Q4_FMT ", slice-count: %d, split: %s\n", + "dsc-dss: compressed-bpp:" FXP_Q4_FMT ", slice-count: %d, num_streams: %d\n", FXP_Q4_ARGS(crtc_state->dsc.compressed_bpp_x16), crtc_state->dsc.slice_count, - str_yes_no(crtc_state->dsc.dsc_split)); + crtc_state->dsc.num_streams); } void intel_vdsc_state_dump(struct drm_printer *p, int indent, @@ -1003,3 +1016,48 @@ void intel_vdsc_state_dump(struct drm_printer *p, int indent, intel_vdsc_dump_state(p, indent, crtc_state); drm_dsc_dump_config(p, indent, &crtc_state->dsc.config); } + +int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_display *display = to_intel_display(crtc); + int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state); + int min_cdclk; + + if (!crtc_state->dsc.compression_enable) + return 0; + + /* + * When we decide to use only one VDSC engine, since + * each VDSC operates with 1 ppc throughput, pixel clock + * cannot be higher than the VDSC clock (cdclk) + * If there 2 VDSC engines, then pixel clock can't be higher than + * VDSC clock(cdclk) * 2 and so on. + */ + min_cdclk = DIV_ROUND_UP(crtc_state->pixel_rate, num_vdsc_instances); + + if (crtc_state->joiner_pipes) { + int pixel_clock = intel_dp_mode_to_fec_clock(crtc_state->hw.adjusted_mode.clock); + + /* + * According to Bigjoiner bw check: + * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock + * + * We have already computed compressed_bpp, so now compute the min CDCLK that + * is required to support this compressed_bpp. + * + * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) + * + * Since PPC = 2 with bigjoiner + * => CDCLK >= compressed_bpp * Pixel clock / 2 * Bigjoiner Interface bits + */ + int bigjoiner_interface_bits = DISPLAY_VER(display) >= 14 ? 36 : 24; + int min_cdclk_bj = + (fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) * + pixel_clock) / (2 * bigjoiner_interface_bits); + + min_cdclk = max(min_cdclk, min_cdclk_bj); + } + + return min_cdclk; +} diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 290b2e9b3482..9e2812f99dd7 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -31,5 +31,6 @@ void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_vdsc_state_dump(struct drm_printer *p, int indent, const struct intel_crtc_state *crtc_state); +int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VDSC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index bf32a3b46fb1..2d478a84b07c 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h @@ -21,8 +21,10 @@ #define MAX_DL_BUFFER_TARGET_DEPTH 0x5a0 #define DSS_CTL2 _MMIO(0x67404) -#define LEFT_BRANCH_VDSC_ENABLE (1 << 31) -#define RIGHT_BRANCH_VDSC_ENABLE (1 << 15) +#define VDSC0_ENABLE REG_BIT(31) +#define VDSC2_ENABLE REG_BIT(30) +#define SMALL_JOINER_CONFIG_3_ENGINES REG_BIT(23) +#define VDSC1_ENABLE REG_BIT(15) #define RIGHT_DL_BUF_TARGET_DEPTH_MASK (0xfff << 0) #define RIGHT_DL_BUF_TARGET_DEPTH(pixels) ((pixels) << 0) @@ -57,8 +59,10 @@ #define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB 0x78270 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB 0x78370 +#define _BMG_DSC2_PICTURE_PARAMETER_SET_0_PB 0x78970 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC 0x78470 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PC 0x78570 +#define _BMG_DSC2_PICTURE_PARAMETER_SET_0_PC 0x78A70 #define ICL_DSC0_PICTURE_PARAMETER_SET_0(pipe) _MMIO_PIPE((pipe) - PIPE_B, \ _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB, \ _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC) @@ -71,8 +75,12 @@ #define _ICL_DSC1_PPS_0(pipe) _PICK_EVEN((pipe) - PIPE_B, \ _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB, \ _ICL_DSC1_PICTURE_PARAMETER_SET_0_PC) +#define _BMG_DSC2_PPS_0(pipe) _PICK_EVEN((pipe) - PIPE_B, \ + _BMG_DSC2_PICTURE_PARAMETER_SET_0_PB, \ + _BMG_DSC2_PICTURE_PARAMETER_SET_0_PC) #define ICL_DSC0_PPS(pipe, pps) _MMIO(_ICL_DSC0_PPS_0(pipe) + ((pps) * 4)) #define ICL_DSC1_PPS(pipe, pps) _MMIO(_ICL_DSC1_PPS_0(pipe) + ((pps) * 4)) +#define BMG_DSC2_PPS(pipe, pps) _MMIO(_BMG_DSC2_PPS_0(pipe) + ((pps) * 4)) /* PPS 0 */ #define DSC_PPS0_NATIVE_422_ENABLE REG_BIT(23) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 19a5d0076bb8..70088e355055 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -4,7 +4,6 @@ * */ -#include "i915_drv.h" #include "i915_reg.h" #include "intel_de.h" #include "intel_display_types.h" @@ -288,7 +287,7 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ if (IS_DISPLAY_VER(display, 12, 13)) - intel_de_rmw(display, CHICKEN_TRANS(cpu_transcoder), + intel_de_rmw(display, CHICKEN_TRANS(display, cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); if (!intel_vrr_possible(crtc_state)) { diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 7dbc99b02eaa..ae21fce534dc 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -3,6 +3,7 @@ * Copyright © 2020 Intel Corporation */ +#include "i915_drv.h" #include "i915_reg.h" #include "intel_de.h" #include "intel_display_types.h" @@ -105,10 +106,10 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, const struct drm_format_info *format, u64 modifier, bool need_scaler) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int pipe_src_w = drm_rect_width(&crtc_state->pipe_src); @@ -130,9 +131,9 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, * Once NV12 is enabled, handle it here while allocating scaler * for NV12. */ - if (DISPLAY_VER(dev_priv) >= 9 && crtc_state->hw.enable && + if (DISPLAY_VER(display) >= 9 && crtc_state->hw.enable && need_scaler && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Pipe/Plane scaling not supported with IF-ID mode\n"); return -EINVAL; } @@ -150,9 +151,9 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, if (force_detach || !need_scaler) { if (*scaler_id >= 0) { scaler_state->scaler_users &= ~(1 << scaler_user); - scaler_state->scalers[*scaler_id].in_use = 0; + scaler_state->scalers[*scaler_id].in_use = false; - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "scaler_user index %u.%u: " "Staged freeing scaler id %d scaler_users = 0x%x\n", crtc->pipe, scaler_user, *scaler_id, @@ -164,7 +165,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, if (format && intel_format_info_is_yuv_semiplanar(format, modifier) && (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Planar YUV: src dimensions not met\n"); return -EINVAL; } @@ -174,17 +175,17 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, min_dst_w = SKL_MIN_DST_W; min_dst_h = SKL_MIN_DST_H; - if (DISPLAY_VER(dev_priv) < 11) { + if (DISPLAY_VER(display) < 11) { max_src_w = SKL_MAX_SRC_W; max_src_h = SKL_MAX_SRC_H; max_dst_w = SKL_MAX_DST_W; max_dst_h = SKL_MAX_DST_H; - } else if (DISPLAY_VER(dev_priv) < 12) { + } else if (DISPLAY_VER(display) < 12) { max_src_w = ICL_MAX_SRC_W; max_src_h = ICL_MAX_SRC_H; max_dst_w = ICL_MAX_DST_W; max_dst_h = ICL_MAX_DST_H; - } else if (DISPLAY_VER(dev_priv) < 14) { + } else if (DISPLAY_VER(display) < 14) { max_src_w = TGL_MAX_SRC_W; max_src_h = TGL_MAX_SRC_H; max_dst_w = TGL_MAX_DST_W; @@ -201,7 +202,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, dst_w < min_dst_w || dst_h < min_dst_h || src_w > max_src_w || src_h > max_src_h || dst_w > max_dst_w || dst_h > max_dst_h) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "scaler_user index %u.%u: src %ux%u dst %ux%u " "size is out of scaler range\n", crtc->pipe, scaler_user, src_w, src_h, @@ -218,7 +219,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, * now. */ if (pipe_src_w > max_dst_w || pipe_src_h > max_dst_h) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "scaler_user index %u.%u: pipe src size %ux%u " "is out of scaler range\n", crtc->pipe, scaler_user, pipe_src_w, pipe_src_h); @@ -227,7 +228,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, /* mark this plane as a scaler user in crtc_state */ scaler_state->scaler_users |= (1 << scaler_user); - drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: " + drm_dbg_kms(display->drm, "scaler_user index %u.%u: " "staged scaling request for %ux%u->%ux%u scaler_users = 0x%x\n", crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h, scaler_state->scaler_users); @@ -268,20 +269,19 @@ int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state) int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct intel_plane *intel_plane = - to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct drm_framebuffer *fb = plane_state->hw.fb; bool force_detach = !fb || !plane_state->uapi.visible; bool need_scaler = false; /* Pre-gen11 and SDR planes always need a scaler for planar formats. */ - if (!icl_is_hdr_plane(dev_priv, intel_plane->id) && + if (!icl_is_hdr_plane(dev_priv, plane->id) && fb && intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) need_scaler = true; return skl_update_scaler(crtc_state, force_detach, - drm_plane_index(&intel_plane->base), + drm_plane_index(&plane->base), &plane_state->scaler_id, drm_rect_width(&plane_state->uapi.src) >> 16, drm_rect_height(&plane_state->uapi.src) >> 16, @@ -292,29 +292,37 @@ int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, need_scaler); } +static int intel_allocate_scaler(struct intel_crtc_scaler_state *scaler_state, + struct intel_crtc *crtc) +{ + int i; + + for (i = 0; i < crtc->num_scalers; i++) { + if (scaler_state->scalers[i].in_use) + continue; + + scaler_state->scalers[i].in_use = true; + + return i; + } + + return -1; +} + static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_state, - int num_scalers_need, struct intel_crtc *intel_crtc, + int num_scalers_need, struct intel_crtc *crtc, const char *name, int idx, struct intel_plane_state *plane_state, int *scaler_id) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - int j; + struct intel_display *display = to_intel_display(crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 mode; - if (*scaler_id < 0) { - /* find a free scaler */ - for (j = 0; j < intel_crtc->num_scalers; j++) { - if (scaler_state->scalers[j].in_use) - continue; - - *scaler_id = j; - scaler_state->scalers[*scaler_id].in_use = 1; - break; - } - } + if (*scaler_id < 0) + *scaler_id = intel_allocate_scaler(scaler_state, crtc); - if (drm_WARN(&dev_priv->drm, *scaler_id < 0, + if (drm_WARN(display->drm, *scaler_id < 0, "Cannot find scaler for %s:%d\n", name, idx)) return -EINVAL; @@ -324,7 +332,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat plane_state->hw.fb->format->num_planes > 1) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - if (DISPLAY_VER(dev_priv) == 9) { + if (DISPLAY_VER(display) == 9) { mode = SKL_PS_SCALER_MODE_NV12; } else if (icl_is_hdr_plane(dev_priv, plane->id)) { /* @@ -342,17 +350,17 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat if (linked) mode |= PS_BINDING_Y_PLANE(linked->id); } - } else if (DISPLAY_VER(dev_priv) >= 10) { + } else if (DISPLAY_VER(display) >= 10) { mode = PS_SCALER_MODE_NORMAL; - } else if (num_scalers_need == 1 && intel_crtc->num_scalers > 1) { + } else if (num_scalers_need == 1 && crtc->num_scalers > 1) { /* * when only 1 scaler is in use on a pipe with 2 scalers * scaler 0 operates in high quality (HQ) mode. * In this case use scaler 0 to take advantage of HQ mode */ - scaler_state->scalers[*scaler_id].in_use = 0; + scaler_state->scalers[*scaler_id].in_use = false; *scaler_id = 0; - scaler_state->scalers[0].in_use = 1; + scaler_state->scalers[0].in_use = true; mode = SKL_PS_SCALER_MODE_HQ; } else { mode = SKL_PS_SCALER_MODE_DYN; @@ -376,7 +384,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat * unnecessarily. */ - if (DISPLAY_VER(dev_priv) >= 14) { + if (DISPLAY_VER(display) >= 14) { /* * On versions 14 and up, only the first * scaler supports a vertical scaling factor @@ -389,7 +397,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat else max_vscale = 0x10000; - } else if (DISPLAY_VER(dev_priv) >= 10 || + } else if (DISPLAY_VER(display) >= 10 || !intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) { max_hscale = 0x30000 - 1; max_vscale = 0x30000 - 1; @@ -408,7 +416,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat vscale = drm_rect_calc_vscale(src, dst, 1, max_vscale); if (hscale < 0 || vscale < 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Scaler %d doesn't support required plane scaling\n", *scaler_id); drm_rect_debug_print("src: ", src, true); @@ -418,18 +426,66 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat } } - drm_dbg_kms(&dev_priv->drm, "Attached scaler id %u.%u to %s:%d\n", - intel_crtc->pipe, *scaler_id, name, idx); + drm_dbg_kms(display->drm, "Attached scaler id %u.%u to %s:%d\n", + crtc->pipe, *scaler_id, name, idx); scaler_state->scalers[*scaler_id].mode = mode; return 0; } +static int setup_crtc_scaler(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + + return intel_atomic_setup_scaler(scaler_state, + hweight32(scaler_state->scaler_users), + crtc, "CRTC", crtc->base.base.id, + NULL, &scaler_state->scaler_id); +} + +static int setup_plane_scaler(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_display *display = to_intel_display(state); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + struct intel_plane_state *plane_state; + + /* plane on different crtc cannot be a scaler user of this crtc */ + if (drm_WARN_ON(display->drm, plane->pipe != crtc->pipe)) + return 0; + + plane_state = intel_atomic_get_new_plane_state(state, plane); + + /* + * GLK+ scalers don't have a HQ mode so it + * isn't necessary to change between HQ and dyn mode + * on those platforms. + */ + if (!plane_state && DISPLAY_VER(display) >= 10) + return 0; + + plane_state = intel_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + + return intel_atomic_setup_scaler(scaler_state, + hweight32(scaler_state->scaler_users), + crtc, "PLANE", plane->base.base.id, + plane_state, &plane_state->scaler_id); +} + /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev_priv: i915 device - * @intel_crtc: intel crtc - * @crtc_state: incoming crtc_state to validate and setup scalers + * @state: atomic state + * @crtc: crtc * * This function sets up scalers based on staged scaling requests for * a @crtc and its planes. It is called from crtc level check path. If request @@ -442,16 +498,14 @@ static int intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_stat * 0 - scalers were setup successfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_plane *plane = NULL; - struct intel_plane *intel_plane; + struct intel_display *display = to_intel_display(crtc); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; - struct drm_atomic_state *drm_state = crtc_state->uapi.state; - struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state); int num_scalers_need; int i; @@ -470,80 +524,33 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, */ /* fail if required scalers > available scalers */ - if (num_scalers_need > intel_crtc->num_scalers) { - drm_dbg_kms(&dev_priv->drm, + if (num_scalers_need > crtc->num_scalers) { + drm_dbg_kms(display->drm, "Too many scaling requests %d > %d\n", - num_scalers_need, intel_crtc->num_scalers); + num_scalers_need, crtc->num_scalers); return -EINVAL; } /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { - struct intel_plane_state *plane_state = NULL; - int *scaler_id; - const char *name; - int idx, ret; + int ret; /* skip if scaler not required */ if (!(scaler_state->scaler_users & (1 << i))) continue; if (i == SKL_CRTC_INDEX) { - name = "CRTC"; - idx = intel_crtc->base.base.id; - - /* panel fitter case: assign as a crtc scaler */ - scaler_id = &scaler_state->scaler_id; + ret = setup_crtc_scaler(state, crtc); + if (ret) + return ret; } else { - name = "PLANE"; - - /* plane scaler case: assign as a plane scaler */ - /* find the plane that set the bit as scaler_user */ - plane = drm_state->planes[i].ptr; + struct intel_plane *plane = + to_intel_plane(drm_plane_from_index(display->drm, i)); - /* - * to enable/disable hq mode, add planes that are using scaler - * into this transaction - */ - if (!plane) { - struct drm_plane_state *state; - - /* - * GLK+ scalers don't have a HQ mode so it - * isn't necessary to change between HQ and dyn mode - * on those platforms. - */ - if (DISPLAY_VER(dev_priv) >= 10) - continue; - - plane = drm_plane_from_index(&dev_priv->drm, i); - state = drm_atomic_get_plane_state(drm_state, plane); - if (IS_ERR(state)) { - drm_dbg_kms(&dev_priv->drm, - "Failed to add [PLANE:%d] to drm_state\n", - plane->base.id); - return PTR_ERR(state); - } - } - - intel_plane = to_intel_plane(plane); - idx = plane->base.id; - - /* plane on different crtc cannot be a scaler user of this crtc */ - if (drm_WARN_ON(&dev_priv->drm, - intel_plane->pipe != intel_crtc->pipe)) - continue; - - plane_state = intel_atomic_get_new_plane_state(intel_state, - intel_plane); - scaler_id = &plane_state->scaler_id; + ret = setup_plane_scaler(state, crtc, plane); + if (ret) + return ret; } - - ret = intel_atomic_setup_scaler(scaler_state, num_scalers_need, - intel_crtc, name, idx, - plane_state, scaler_id); - if (ret < 0) - return ret; } return 0; @@ -596,12 +603,12 @@ static u16 glk_nearest_filter_coef(int t) * */ -static void glk_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, +static void glk_program_nearest_filter_coefs(struct intel_display *display, enum pipe pipe, int id, int set) { int i; - intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), + intel_de_write_fw(display, GLK_PS_COEF_INDEX_SET(pipe, id, set), PS_COEF_INDEX_AUTO_INC); for (i = 0; i < 17 * 7; i += 2) { @@ -614,11 +621,11 @@ static void glk_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, t = glk_coef_tap(i + 1); tmp |= glk_nearest_filter_coef(t) << 16; - intel_de_write_fw(dev_priv, GLK_PS_COEF_DATA_SET(pipe, id, set), + intel_de_write_fw(display, GLK_PS_COEF_DATA_SET(pipe, id, set), tmp); } - intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), 0); + intel_de_write_fw(display, GLK_PS_COEF_INDEX_SET(pipe, id, set), 0); } static u32 skl_scaler_get_filter_select(enum drm_scaling_filter filter, int set) @@ -634,14 +641,14 @@ static u32 skl_scaler_get_filter_select(enum drm_scaling_filter filter, int set) return PS_FILTER_MEDIUM; } -static void skl_scaler_setup_filter(struct drm_i915_private *dev_priv, enum pipe pipe, +static void skl_scaler_setup_filter(struct intel_display *display, enum pipe pipe, int id, int set, enum drm_scaling_filter filter) { switch (filter) { case DRM_SCALING_FILTER_DEFAULT: break; case DRM_SCALING_FILTER_NEAREST_NEIGHBOR: - glk_program_nearest_filter_coefs(dev_priv, pipe, id, set); + glk_program_nearest_filter_coefs(display, pipe, id, set); break; default: MISSING_CASE(filter); @@ -650,8 +657,8 @@ static void skl_scaler_setup_filter(struct drm_i915_private *dev_priv, enum pipe void skl_pfit_enable(const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; const struct drm_rect *dst = &crtc_state->pch_pfit.dst; @@ -669,7 +676,7 @@ void skl_pfit_enable(const struct intel_crtc_state *crtc_state) if (!crtc_state->pch_pfit.enabled) return; - if (drm_WARN_ON(&dev_priv->drm, + if (drm_WARN_ON(display->drm, crtc_state->scaler_state.scaler_id < 0)) return; @@ -688,18 +695,18 @@ void skl_pfit_enable(const struct intel_crtc_state *crtc_state) ps_ctrl = PS_SCALER_EN | PS_BINDING_PIPE | scaler_state->scalers[id].mode | skl_scaler_get_filter_select(crtc_state->hw.scaling_filter, 0); - skl_scaler_setup_filter(dev_priv, pipe, id, 0, + skl_scaler_setup_filter(display, pipe, id, 0, crtc_state->hw.scaling_filter); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, id), ps_ctrl); + intel_de_write_fw(display, SKL_PS_CTRL(pipe, id), ps_ctrl); - intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, id), + intel_de_write_fw(display, SKL_PS_VPHASE(pipe, id), PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_vphase)); - intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, id), + intel_de_write_fw(display, SKL_PS_HPHASE(pipe, id), PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_hphase)); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, id), + intel_de_write_fw(display, SKL_PS_WIN_POS(pipe, id), PS_WIN_XPOS(x) | PS_WIN_YPOS(y)); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, id), + intel_de_write_fw(display, SKL_PS_WIN_SZ(pipe, id), PS_WIN_XSIZE(width) | PS_WIN_YSIZE(height)); } @@ -708,6 +715,7 @@ skl_program_plane_scaler(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; @@ -751,28 +759,27 @@ skl_program_plane_scaler(struct intel_plane *plane, ps_ctrl = PS_SCALER_EN | PS_BINDING_PLANE(plane->id) | scaler->mode | skl_scaler_get_filter_select(plane_state->hw.scaling_filter, 0); - skl_scaler_setup_filter(dev_priv, pipe, scaler_id, 0, + skl_scaler_setup_filter(display, pipe, scaler_id, 0, plane_state->hw.scaling_filter); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); - intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, scaler_id), + intel_de_write_fw(display, SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); + intel_de_write_fw(display, SKL_PS_VPHASE(pipe, scaler_id), PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); - intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, scaler_id), + intel_de_write_fw(display, SKL_PS_HPHASE(pipe, scaler_id), PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, scaler_id), + intel_de_write_fw(display, SKL_PS_WIN_POS(pipe, scaler_id), PS_WIN_XPOS(crtc_x) | PS_WIN_YPOS(crtc_y)); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, scaler_id), + intel_de_write_fw(display, SKL_PS_WIN_SZ(pipe, scaler_id), PS_WIN_XSIZE(crtc_w) | PS_WIN_YSIZE(crtc_h)); } static void skl_detach_scaler(struct intel_crtc *crtc, int id) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(crtc); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, id), 0); + intel_de_write_fw(display, SKL_PS_CTRL(crtc->pipe, id), 0); + intel_de_write_fw(display, SKL_PS_WIN_POS(crtc->pipe, id), 0); + intel_de_write_fw(display, SKL_PS_WIN_SZ(crtc->pipe, id), 0); } /* @@ -803,8 +810,8 @@ void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state) void skl_scaler_get_config(struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int id = -1; int i; @@ -813,15 +820,15 @@ void skl_scaler_get_config(struct intel_crtc_state *crtc_state) for (i = 0; i < crtc->num_scalers; i++) { u32 ctl, pos, size; - ctl = intel_de_read(dev_priv, SKL_PS_CTRL(crtc->pipe, i)); + ctl = intel_de_read(display, SKL_PS_CTRL(crtc->pipe, i)); if ((ctl & (PS_SCALER_EN | PS_BINDING_MASK)) != (PS_SCALER_EN | PS_BINDING_PIPE)) continue; id = i; crtc_state->pch_pfit.enabled = true; - pos = intel_de_read(dev_priv, SKL_PS_WIN_POS(crtc->pipe, i)); - size = intel_de_read(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, i)); + pos = intel_de_read(display, SKL_PS_WIN_POS(crtc->pipe, i)); + size = intel_de_read(display, SKL_PS_WIN_SZ(crtc->pipe, i)); drm_rect_init(&crtc_state->pch_pfit.dst, REG_FIELD_GET(PS_WIN_XPOS_MASK, pos), diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 63f93ca03c89..4d2e2dbb1666 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -5,11 +5,7 @@ #ifndef INTEL_SCALER_H #define INTEL_SCALER_H -#include <linux/types.h> - -enum drm_scaling_filter; -enum pipe; -struct drm_i915_private; +struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_plane; @@ -20,9 +16,8 @@ int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); -int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct intel_atomic_state *state, + struct intel_crtc *crtc); void skl_pfit_enable(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 038ca2ec5d7a..ff9764cac1e7 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -239,7 +239,9 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) static u8 icl_nv12_y_plane_mask(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 13 || HAS_D12_PLANE_MINIMIZATION(i915)) + struct intel_display *display = &i915->display; + + if (DISPLAY_VER(display) >= 13 || HAS_D12_PLANE_MINIMIZATION(display)) return BIT(PLANE_4) | BIT(PLANE_5); else return BIT(PLANE_6) | BIT(PLANE_7); @@ -715,6 +717,22 @@ static u32 skl_plane_ddb_reg_val(const struct skl_ddb_entry *entry) PLANE_BUF_START(entry->start); } +static u32 xe3_plane_min_ddb_reg_val(const u16 *min_ddb, + const u16 *interim_ddb) +{ + u32 val = 0; + + if (*min_ddb) + val |= PLANE_MIN_DBUF_BLOCKS(*min_ddb); + + if (*interim_ddb) + val |= PLANE_INTERIM_DBUF_BLOCKS(*interim_ddb); + + val |= val ? PLANE_AUTO_MIN_DBUF_EN : 0; + + return val; +} + static u32 skl_plane_wm_reg_val(const struct skl_wm_level *level) { u32 val = 0; @@ -723,6 +741,9 @@ static u32 skl_plane_wm_reg_val(const struct skl_wm_level *level) val |= PLANE_WM_EN; if (level->ignore_lines) val |= PLANE_WM_IGNORE_LINES; + if (level->auto_min_alloc_wm_enable) + val |= PLANE_WM_AUTO_MIN_ALLOC_EN; + val |= REG_FIELD_PREP(PLANE_WM_BLOCKS_MASK, level->blocks); val |= REG_FIELD_PREP(PLANE_WM_LINES_MASK, level->lines); @@ -734,7 +755,6 @@ static void skl_write_plane_wm(struct intel_dsb *dsb, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(plane->base.dev); - struct drm_i915_private *i915 = to_i915(plane->base.dev); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; @@ -742,16 +762,19 @@ static void skl_write_plane_wm(struct intel_dsb *dsb, &crtc_state->wm.skl.plane_ddb[plane_id]; const struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; + const u16 *min_ddb = &crtc_state->wm.skl.plane_min_ddb[plane_id]; + const u16 *interim_ddb = + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; int level; - for (level = 0; level < i915->display.wm.num_levels; level++) + for (level = 0; level < display->wm.num_levels; level++) intel_de_write_dsb(display, dsb, PLANE_WM(pipe, plane_id, level), skl_plane_wm_reg_val(skl_plane_wm_level(pipe_wm, plane_id, level))); intel_de_write_dsb(display, dsb, PLANE_WM_TRANS(pipe, plane_id), skl_plane_wm_reg_val(skl_plane_trans_wm(pipe_wm, plane_id))); - if (HAS_HW_SAGV_WM(i915)) { + if (HAS_HW_SAGV_WM(display)) { const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; intel_de_write_dsb(display, dsb, PLANE_WM_SAGV(pipe, plane_id), @@ -763,9 +786,13 @@ static void skl_write_plane_wm(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_BUF_CFG(pipe, plane_id), skl_plane_ddb_reg_val(ddb)); - if (DISPLAY_VER(i915) < 11) + if (DISPLAY_VER(display) < 11) intel_de_write_dsb(display, dsb, PLANE_NV12_BUF_CFG(pipe, plane_id), skl_plane_ddb_reg_val(ddb_y)); + + if (DISPLAY_VER(display) >= 30) + intel_de_write_dsb(display, dsb, PLANE_MIN_BUF_CFG(pipe, plane_id), + xe3_plane_min_ddb_reg_val(min_ddb, interim_ddb)); } static void @@ -2548,13 +2575,14 @@ static bool tgl_plane_has_mc_ccs(struct drm_i915_private *i915, static u8 skl_get_plane_caps(struct drm_i915_private *i915, enum pipe pipe, enum plane_id plane_id) { + struct intel_display *display = &i915->display; u8 caps = INTEL_PLANE_CAP_TILING_X; - if (DISPLAY_VER(i915) < 13 || IS_ALDERLAKE_P(i915)) + if (DISPLAY_VER(display) < 13 || display->platform.alderlake_p) caps |= INTEL_PLANE_CAP_TILING_Y; - if (DISPLAY_VER(i915) < 12) + if (DISPLAY_VER(display) < 12) caps |= INTEL_PLANE_CAP_TILING_Yf; - if (HAS_4TILE(i915)) + if (HAS_4TILE(display)) caps |= INTEL_PLANE_CAP_TILING_4; if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915)) @@ -2562,14 +2590,14 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915, if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) { caps |= INTEL_PLANE_CAP_CCS_RC; - if (DISPLAY_VER(i915) >= 12) + if (DISPLAY_VER(display) >= 12) caps |= INTEL_PLANE_CAP_CCS_RC_CC; } if (tgl_plane_has_mc_ccs(i915, plane_id)) caps |= INTEL_PLANE_CAP_CCS_MC; - if (DISPLAY_VER(i915) >= 14 && IS_DGFX(i915)) + if (DISPLAY_VER(display) >= 14 && display->platform.dgfx) caps |= INTEL_PLANE_CAP_NEED64K_PHYS; return caps; @@ -2743,6 +2771,7 @@ void skl_get_initial_plane_config(struct intel_crtc *crtc, struct intel_initial_plane_config *plane_config) { + struct intel_display *display = to_intel_display(crtc); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -2824,7 +2853,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, fb->modifier = I915_FORMAT_MOD_Y_TILED; break; case PLANE_CTL_TILED_YF: /* aka PLANE_CTL_TILED_4 on XE_LPD+ */ - if (HAS_4TILE(dev_priv)) { + if (HAS_4TILE(display)) { u32 rc_mask = PLANE_CTL_RENDER_DECOMPRESSION_ENABLE | PLANE_CTL_CLEAR_COLOR_DISABLE; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h index ff31a00d511e..ca9fdfbbe57c 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h +++ b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h @@ -322,6 +322,7 @@ _PLANE_WM_2_A_0, _PLANE_WM_2_B_0) #define PLANE_WM_EN REG_BIT(31) #define PLANE_WM_IGNORE_LINES REG_BIT(30) +#define PLANE_WM_AUTO_MIN_ALLOC_EN REG_BIT(29) #define PLANE_WM_LINES_MASK REG_GENMASK(26, 14) #define PLANE_WM_BLOCKS_MASK REG_GENMASK(11, 0) @@ -373,12 +374,26 @@ #define PLANE_BUF_CFG(pipe, plane) _MMIO_SKL_PLANE((pipe), (plane), \ _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B, \ _PLANE_BUF_CFG_2_A, _PLANE_BUF_CFG_2_B) + /* skl+: 10 bits, icl+ 11 bits, adlp+ 12 bits */ #define PLANE_BUF_END_MASK REG_GENMASK(27, 16) #define PLANE_BUF_END(end) REG_FIELD_PREP(PLANE_BUF_END_MASK, (end)) #define PLANE_BUF_START_MASK REG_GENMASK(11, 0) #define PLANE_BUF_START(start) REG_FIELD_PREP(PLANE_BUF_START_MASK, (start)) +#define _PLANE_MIN_BUF_CFG_1_A 0x70274 +#define _PLANE_MIN_BUF_CFG_2_A 0x70374 +#define _PLANE_MIN_BUF_CFG_1_B 0x71274 +#define _PLANE_MIN_BUF_CFG_2_B 0x71374 +#define PLANE_MIN_BUF_CFG(pipe, plane) _MMIO_SKL_PLANE((pipe), (plane), \ + _PLANE_MIN_BUF_CFG_1_A, _PLANE_MIN_BUF_CFG_1_B, \ + _PLANE_MIN_BUF_CFG_2_A, _PLANE_MIN_BUF_CFG_2_B) +#define PLANE_AUTO_MIN_DBUF_EN REG_BIT(31) +#define PLANE_MIN_DBUF_BLOCKS_MASK REG_GENMASK(27, 16) +#define PLANE_MIN_DBUF_BLOCKS(val) REG_FIELD_PREP(PLANE_MIN_DBUF_BLOCKS_MASK, (val)) +#define PLANE_INTERIM_DBUF_BLOCKS_MASK REG_GENMASK(11, 0) +#define PLANE_INTERIM_DBUF_BLOCKS(val) REG_FIELD_PREP(PLANE_INTERIM_DBUF_BLOCKS_MASK, (val)) + /* tgl+ */ #define _SEL_FETCH_PLANE_CTL_1_A 0x70890 #define _SEL_FETCH_PLANE_CTL_2_A 0x708b0 diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 3b0e87edbacf..f4458d1185b3 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -77,20 +77,23 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *i915) bool intel_has_sagv(struct drm_i915_private *i915) { - return HAS_SAGV(i915) && - i915->display.sagv.status != I915_SAGV_NOT_CONTROLLED; + struct intel_display *display = &i915->display; + + return HAS_SAGV(display) && display->sagv.status != I915_SAGV_NOT_CONTROLLED; } static u32 intel_sagv_block_time(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 14) { + struct intel_display *display = &i915->display; + + if (DISPLAY_VER(display) >= 14) { u32 val; - val = intel_de_read(i915, MTL_LATENCY_SAGV); + val = intel_de_read(display, MTL_LATENCY_SAGV); return REG_FIELD_GET(MTL_LATENCY_QCLK_SAGV, val); - } else if (DISPLAY_VER(i915) >= 12) { + } else if (DISPLAY_VER(display) >= 12) { u32 val = 0; int ret; @@ -98,14 +101,14 @@ intel_sagv_block_time(struct drm_i915_private *i915) GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, &val, NULL); if (ret) { - drm_dbg_kms(&i915->drm, "Couldn't read SAGV block time!\n"); + drm_dbg_kms(display->drm, "Couldn't read SAGV block time!\n"); return 0; } return val; - } else if (DISPLAY_VER(i915) == 11) { + } else if (DISPLAY_VER(display) == 11) { return 10; - } else if (HAS_SAGV(i915)) { + } else if (HAS_SAGV(display)) { return 30; } else { return 0; @@ -114,31 +117,33 @@ intel_sagv_block_time(struct drm_i915_private *i915) static void intel_sagv_init(struct drm_i915_private *i915) { - if (!HAS_SAGV(i915)) - i915->display.sagv.status = I915_SAGV_NOT_CONTROLLED; + struct intel_display *display = &i915->display; + + if (!HAS_SAGV(display)) + display->sagv.status = I915_SAGV_NOT_CONTROLLED; /* * Probe to see if we have working SAGV control. * For icl+ this was already determined by intel_bw_init_hw(). */ - if (DISPLAY_VER(i915) < 11) + if (DISPLAY_VER(display) < 11) skl_sagv_disable(i915); - drm_WARN_ON(&i915->drm, i915->display.sagv.status == I915_SAGV_UNKNOWN); + drm_WARN_ON(display->drm, display->sagv.status == I915_SAGV_UNKNOWN); - i915->display.sagv.block_time_us = intel_sagv_block_time(i915); + display->sagv.block_time_us = intel_sagv_block_time(i915); - drm_dbg_kms(&i915->drm, "SAGV supported: %s, original SAGV block time: %u us\n", - str_yes_no(intel_has_sagv(i915)), i915->display.sagv.block_time_us); + drm_dbg_kms(display->drm, "SAGV supported: %s, original SAGV block time: %u us\n", + str_yes_no(intel_has_sagv(i915)), display->sagv.block_time_us); /* avoid overflow when adding with wm0 latency/etc. */ - if (drm_WARN(&i915->drm, i915->display.sagv.block_time_us > U16_MAX, + if (drm_WARN(display->drm, display->sagv.block_time_us > U16_MAX, "Excessive SAGV block time %u, ignoring\n", - i915->display.sagv.block_time_us)) - i915->display.sagv.block_time_us = 0; + display->sagv.block_time_us)) + display->sagv.block_time_us = 0; if (!intel_has_sagv(i915)) - i915->display.sagv.block_time_us = 0; + display->sagv.block_time_us = 0; } /* @@ -444,6 +449,7 @@ bool intel_can_enable_sagv(struct drm_i915_private *i915, static int intel_compute_sagv_mask(struct intel_atomic_state *state) { + struct intel_display *display = to_intel_display(state); struct drm_i915_private *i915 = to_i915(state->base.dev); int ret; struct intel_crtc *crtc; @@ -479,7 +485,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) * other crtcs can't be allowed to use the more optimal * normal (ie. non-SAGV) watermarks. */ - pipe_wm->use_sagv_wm = !HAS_HW_SAGV_WM(i915) && + pipe_wm->use_sagv_wm = !HAS_HW_SAGV_WM(display) && DISPLAY_VER(i915) >= 12 && intel_crtc_can_enable_sagv(new_crtc_state); @@ -795,30 +801,40 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *i915, const enum pipe pipe, const enum plane_id plane_id, struct skl_ddb_entry *ddb, - struct skl_ddb_entry *ddb_y) + struct skl_ddb_entry *ddb_y, + u16 *min_ddb, u16 *interim_ddb) { + struct intel_display *display = &i915->display; u32 val; /* Cursor doesn't support NV12/planar, so no extra calculation needed */ if (plane_id == PLANE_CURSOR) { - val = intel_de_read(i915, CUR_BUF_CFG(pipe)); + val = intel_de_read(display, CUR_BUF_CFG(pipe)); skl_ddb_entry_init_from_hw(ddb, val); return; } - val = intel_de_read(i915, PLANE_BUF_CFG(pipe, plane_id)); + val = intel_de_read(display, PLANE_BUF_CFG(pipe, plane_id)); skl_ddb_entry_init_from_hw(ddb, val); - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 30) { + val = intel_de_read(display, PLANE_MIN_BUF_CFG(pipe, plane_id)); + + *min_ddb = REG_FIELD_GET(PLANE_MIN_DBUF_BLOCKS_MASK, val); + *interim_ddb = REG_FIELD_GET(PLANE_INTERIM_DBUF_BLOCKS_MASK, val); + } + + if (DISPLAY_VER(display) >= 11) return; - val = intel_de_read(i915, PLANE_NV12_BUF_CFG(pipe, plane_id)); + val = intel_de_read(display, PLANE_NV12_BUF_CFG(pipe, plane_id)); skl_ddb_entry_init_from_hw(ddb_y, val); } static void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct skl_ddb_entry *ddb, - struct skl_ddb_entry *ddb_y) + struct skl_ddb_entry *ddb_y, + u16 *min_ddb, u16 *interim_ddb) { struct drm_i915_private *i915 = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; @@ -835,7 +851,9 @@ static void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, skl_ddb_get_hw_plane_state(i915, pipe, plane_id, &ddb[plane_id], - &ddb_y[plane_id]); + &ddb_y[plane_id], + &min_ddb[plane_id], + &interim_ddb[plane_id]); intel_display_power_put(i915, power_domain, wakeref); } @@ -1370,13 +1388,30 @@ static bool use_minimal_wm0_only(const struct intel_crtc_state *crtc_state, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane); - return DISPLAY_VER(i915) >= 13 && + /* Xe3+ are auto minimum DDB capble. So don't force minimal wm0 */ + return IS_DISPLAY_VER(display, 13, 20) && crtc_state->uapi.async_flip && plane->async_flip; } +unsigned int +skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, + struct intel_plane *plane, int width, int height, + int cpp) +{ + /* + * We calculate extra ddb based on ratio plane rate/total data rate + * in case, in some cases we should not allocate extra ddb for the plane, + * so do not count its data rate, if this is the case. + */ + if (use_minimal_wm0_only(crtc_state, plane)) + return 0; + + return width * height * cpp; +} + static u64 skl_total_relative_data_rate(const struct intel_crtc_state *crtc_state) { @@ -1513,6 +1548,7 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, const struct intel_dbuf_state *dbuf_state = intel_atomic_get_new_dbuf_state(state); const struct skl_ddb_entry *alloc = &dbuf_state->ddb[crtc->pipe]; + struct intel_display *display = to_intel_display(state); int num_active = hweight8(dbuf_state->active_pipes); struct skl_plane_ddb_iter iter; enum plane_id plane_id; @@ -1523,6 +1559,10 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, /* Clear the partitioning for disabled planes. */ memset(crtc_state->wm.skl.plane_ddb, 0, sizeof(crtc_state->wm.skl.plane_ddb)); memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state->wm.skl.plane_ddb_y)); + memset(crtc_state->wm.skl.plane_min_ddb, 0, + sizeof(crtc_state->wm.skl.plane_min_ddb)); + memset(crtc_state->wm.skl.plane_interim_ddb, 0, + sizeof(crtc_state->wm.skl.plane_interim_ddb)); if (!crtc_state->hw.active) return 0; @@ -1595,6 +1635,9 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, &crtc_state->wm.skl.plane_ddb[plane_id]; struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; + u16 *min_ddb = &crtc_state->wm.skl.plane_min_ddb[plane_id]; + u16 *interim_ddb = + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; @@ -1611,6 +1654,11 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, skl_allocate_plane_ddb(&iter, ddb, &wm->wm[level], crtc_state->rel_data_rate[plane_id]); } + + if (DISPLAY_VER(display) >= 30) { + *min_ddb = wm->wm[0].min_ddb_alloc; + *interim_ddb = wm->sagv.wm0.min_ddb_alloc; + } } drm_WARN_ON(&i915->drm, iter.size != 0 || iter.data_rate != 0); @@ -1654,6 +1702,8 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, &crtc_state->wm.skl.plane_ddb[plane_id]; const struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; + u16 *interim_ddb = + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; @@ -1667,6 +1717,9 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, } skl_check_wm_level(&wm->sagv.wm0, ddb); + if (DISPLAY_VER(display) >= 30) + *interim_ddb = wm->sagv.wm0.min_ddb_alloc; + skl_check_wm_level(&wm->sagv.trans_wm, ddb); } @@ -1745,6 +1798,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, int color_plane, unsigned int pan_x) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *i915 = to_i915(crtc->base.dev); u32 interm_pbpl; @@ -1803,7 +1857,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_min_scanlines, wp->dbuf_block_size); - if (DISPLAY_VER(i915) >= 30) + if (DISPLAY_VER(display) >= 30) interm_pbpl += (pan_x != 0); else if (DISPLAY_VER(i915) >= 10) interm_pbpl++; @@ -1868,6 +1922,13 @@ static int skl_wm_max_lines(struct drm_i915_private *i915) return 31; } +static bool xe3_auto_min_alloc_capable(struct intel_plane *plane, int level) +{ + struct intel_display *display = to_intel_display(plane); + + return DISPLAY_VER(display) >= 30 && level == 0 && plane->id != PLANE_CURSOR; +} + static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, struct intel_plane *plane, int level, @@ -2000,6 +2061,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; result->enable = true; + result->auto_min_alloc_wm_enable = xe3_auto_min_alloc_capable(plane, level); if (DISPLAY_VER(i915) < 12 && i915->display.sagv.block_time_us) result->can_sagv = latency >= i915->display.sagv.block_time_us; @@ -2379,16 +2441,18 @@ static bool skl_wm_level_equals(const struct skl_wm_level *l1, return l1->enable == l2->enable && l1->ignore_lines == l2->ignore_lines && l1->lines == l2->lines && - l1->blocks == l2->blocks; + l1->blocks == l2->blocks && + l1->auto_min_alloc_wm_enable == l2->auto_min_alloc_wm_enable; } static bool skl_plane_wm_equals(struct drm_i915_private *i915, const struct skl_plane_wm *wm1, const struct skl_plane_wm *wm2) { + struct intel_display *display = &i915->display; int level; - for (level = 0; level < i915->display.wm.num_levels; level++) { + for (level = 0; level < display->wm.num_levels; level++) { /* * We don't check uv_wm as the hardware doesn't actually * use it. It only gets used for calculating the required @@ -2496,6 +2560,7 @@ static u8 intel_dbuf_enabled_slices(const struct intel_dbuf_state *dbuf_state) static int skl_compute_ddb(struct intel_atomic_state *state) { + struct intel_display *display = to_intel_display(state); struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_dbuf_state *old_dbuf_state; struct intel_dbuf_state *new_dbuf_state = NULL; @@ -2524,7 +2589,7 @@ skl_compute_ddb(struct intel_atomic_state *state) return ret; } - if (HAS_MBUS_JOINING(i915)) { + if (HAS_MBUS_JOINING(display)) { new_dbuf_state->joined_mbus = adlp_check_mbus_joined(new_dbuf_state->active_pipes); @@ -2742,10 +2807,10 @@ static bool skl_plane_selected_wm_equals(struct intel_plane *plane, const struct skl_pipe_wm *old_pipe_wm, const struct skl_pipe_wm *new_pipe_wm) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane); int level; - for (level = 0; level < i915->display.wm.num_levels; level++) { + for (level = 0; level < display->wm.num_levels; level++) { /* * We don't check uv_wm as the hardware doesn't actually * use it. It only gets used for calculating the required @@ -2756,7 +2821,7 @@ static bool skl_plane_selected_wm_equals(struct intel_plane *plane, return false; } - if (HAS_HW_SAGV_WM(i915)) { + if (HAS_HW_SAGV_WM(display)) { const struct skl_plane_wm *old_wm = &old_pipe_wm->planes[plane->id]; const struct skl_plane_wm *new_wm = &new_pipe_wm->planes[plane->id]; @@ -2847,32 +2912,58 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, * Program DEEP PKG_C_LATENCY Pkg C with all 1's. * Program PKG_C_LATENCY Added Wake Time = 0 */ -static void -skl_program_dpkgc_latency(struct drm_i915_private *i915, bool enable_dpkgc) +void +intel_program_dpkgc_latency(struct intel_atomic_state *state) { - u32 max_latency = 0; - u32 clear = 0, val = 0; + struct intel_display *display = to_intel_display(state); + struct drm_i915_private *i915 = to_i915(display->drm); + struct intel_crtc *crtc; + struct intel_crtc_state *new_crtc_state; + u32 latency = LNL_PKG_C_LATENCY_MASK; u32 added_wake_time = 0; + u32 max_linetime = 0; + u32 clear, val; + bool fixed_refresh_rate = false; + int i; - if (DISPLAY_VER(i915) < 20) + if (DISPLAY_VER(display) < 20) return; - if (enable_dpkgc) { - max_latency = skl_watermark_max_latency(i915, 1); - if (max_latency == 0) - max_latency = LNL_PKG_C_LATENCY_MASK; + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!new_crtc_state->vrr.enable || + (new_crtc_state->vrr.vmin == new_crtc_state->vrr.vmax && + new_crtc_state->vrr.vmin == new_crtc_state->vrr.flipline)) + fixed_refresh_rate = true; + + max_linetime = max(new_crtc_state->linetime, max_linetime); + } + + if (fixed_refresh_rate) { added_wake_time = DSB_EXE_TIME + - i915->display.sagv.block_time_us; - } else { - max_latency = LNL_PKG_C_LATENCY_MASK; - added_wake_time = 0; + display->sagv.block_time_us; + + latency = skl_watermark_max_latency(i915, 1); + + /* Wa_22020432604 */ + if ((DISPLAY_VER(display) == 20 || DISPLAY_VER(display) == 30) && !latency) { + latency += added_wake_time; + added_wake_time = 0; + } + + /* Wa_22020299601 */ + if ((latency && max_linetime) && + (DISPLAY_VER(display) == 20 || DISPLAY_VER(display) == 30)) { + latency = max_linetime * DIV_ROUND_UP(latency, max_linetime); + } else if (!latency) { + latency = LNL_PKG_C_LATENCY_MASK; + } } - clear |= LNL_ADDED_WAKE_TIME_MASK | LNL_PKG_C_LATENCY_MASK; - val |= REG_FIELD_PREP(LNL_PKG_C_LATENCY_MASK, max_latency); - val |= REG_FIELD_PREP(LNL_ADDED_WAKE_TIME_MASK, added_wake_time); + clear = LNL_ADDED_WAKE_TIME_MASK | LNL_PKG_C_LATENCY_MASK; + val = REG_FIELD_PREP(LNL_PKG_C_LATENCY_MASK, latency) | + REG_FIELD_PREP(LNL_ADDED_WAKE_TIME_MASK, added_wake_time); - intel_uncore_rmw(&i915->uncore, LNL_PKG_C_LATENCY, clear, val); + intel_de_rmw(display, LNL_PKG_C_LATENCY, clear, val); } static int @@ -2881,7 +2972,6 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc *crtc; struct intel_crtc_state __maybe_unused *new_crtc_state; int ret, i; - bool enable_dpkgc = false; for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { ret = skl_build_pipe_wm(state, crtc); @@ -2906,32 +2996,28 @@ skl_compute_wm(struct intel_atomic_state *state) ret = skl_wm_add_affected_planes(state, crtc); if (ret) return ret; - - if ((new_crtc_state->vrr.vmin == new_crtc_state->vrr.vmax && - new_crtc_state->vrr.vmin == new_crtc_state->vrr.flipline) || - !new_crtc_state->vrr.enable) - enable_dpkgc = true; } - skl_program_dpkgc_latency(to_i915(state->base.dev), enable_dpkgc); - skl_print_wm_changes(state); return 0; } -static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) +static void skl_wm_level_from_reg_val(struct intel_display *display, + u32 val, struct skl_wm_level *level) { level->enable = val & PLANE_WM_EN; level->ignore_lines = val & PLANE_WM_IGNORE_LINES; level->blocks = REG_FIELD_GET(PLANE_WM_BLOCKS_MASK, val); level->lines = REG_FIELD_GET(PLANE_WM_LINES_MASK, val); + level->auto_min_alloc_wm_enable = DISPLAY_VER(display) >= 30 ? + val & PLANE_WM_AUTO_MIN_ALLOC_EN : 0; } static void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); enum pipe pipe = crtc->pipe; enum plane_id plane_id; int level; @@ -2940,37 +3026,37 @@ static void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, for_each_plane_id_on_crtc(crtc, plane_id) { struct skl_plane_wm *wm = &out->planes[plane_id]; - for (level = 0; level < i915->display.wm.num_levels; level++) { + for (level = 0; level < display->wm.num_levels; level++) { if (plane_id != PLANE_CURSOR) - val = intel_de_read(i915, PLANE_WM(pipe, plane_id, level)); + val = intel_de_read(display, PLANE_WM(pipe, plane_id, level)); else - val = intel_de_read(i915, CUR_WM(pipe, level)); + val = intel_de_read(display, CUR_WM(pipe, level)); - skl_wm_level_from_reg_val(val, &wm->wm[level]); + skl_wm_level_from_reg_val(display, val, &wm->wm[level]); } if (plane_id != PLANE_CURSOR) - val = intel_de_read(i915, PLANE_WM_TRANS(pipe, plane_id)); + val = intel_de_read(display, PLANE_WM_TRANS(pipe, plane_id)); else - val = intel_de_read(i915, CUR_WM_TRANS(pipe)); + val = intel_de_read(display, CUR_WM_TRANS(pipe)); - skl_wm_level_from_reg_val(val, &wm->trans_wm); + skl_wm_level_from_reg_val(display, val, &wm->trans_wm); - if (HAS_HW_SAGV_WM(i915)) { + if (HAS_HW_SAGV_WM(display)) { if (plane_id != PLANE_CURSOR) - val = intel_de_read(i915, PLANE_WM_SAGV(pipe, plane_id)); + val = intel_de_read(display, PLANE_WM_SAGV(pipe, plane_id)); else - val = intel_de_read(i915, CUR_WM_SAGV(pipe)); + val = intel_de_read(display, CUR_WM_SAGV(pipe)); - skl_wm_level_from_reg_val(val, &wm->sagv.wm0); + skl_wm_level_from_reg_val(display, val, &wm->sagv.wm0); if (plane_id != PLANE_CURSOR) - val = intel_de_read(i915, PLANE_WM_SAGV_TRANS(pipe, plane_id)); + val = intel_de_read(display, PLANE_WM_SAGV_TRANS(pipe, plane_id)); else - val = intel_de_read(i915, CUR_WM_SAGV_TRANS(pipe)); + val = intel_de_read(display, CUR_WM_SAGV_TRANS(pipe)); - skl_wm_level_from_reg_val(val, &wm->sagv.trans_wm); - } else if (DISPLAY_VER(i915) >= 12) { + skl_wm_level_from_reg_val(display, val, &wm->sagv.trans_wm); + } else if (DISPLAY_VER(display) >= 12) { wm->sagv.wm0 = wm->wm[0]; wm->sagv.trans_wm = wm->trans_wm; } @@ -2984,12 +3070,12 @@ static void skl_wm_get_hw_state(struct drm_i915_private *i915) to_intel_dbuf_state(i915->display.dbuf.obj.state); struct intel_crtc *crtc; - if (HAS_MBUS_JOINING(i915)) - dbuf_state->joined_mbus = intel_de_read(i915, MBUS_CTL) & MBUS_JOIN; + if (HAS_MBUS_JOINING(display)) + dbuf_state->joined_mbus = intel_de_read(display, MBUS_CTL) & MBUS_JOIN; dbuf_state->mdclk_cdclk_ratio = intel_mdclk_cdclk_ratio(display, &display->cdclk.hw); - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); enum pipe pipe = crtc->pipe; @@ -3010,12 +3096,17 @@ static void skl_wm_get_hw_state(struct drm_i915_private *i915) &crtc_state->wm.skl.plane_ddb[plane_id]; struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; + u16 *min_ddb = + &crtc_state->wm.skl.plane_min_ddb[plane_id]; + u16 *interim_ddb = + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; if (!crtc_state->hw.active) continue; skl_ddb_get_hw_plane_state(i915, crtc->pipe, - plane_id, ddb, ddb_y); + plane_id, ddb, ddb_y, + min_ddb, interim_ddb); skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb); skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_y); @@ -3037,7 +3128,7 @@ static void skl_wm_get_hw_state(struct drm_i915_private *i915) dbuf_state->slices[pipe] = skl_ddb_dbuf_slice_mask(i915, &crtc_state->wm.skl.ddb); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x, mbus joined: %s\n", crtc->base.base.id, crtc->base.name, dbuf_state->slices[pipe], dbuf_state->ddb[pipe].start, @@ -3045,203 +3136,7 @@ static void skl_wm_get_hw_state(struct drm_i915_private *i915) str_yes_no(dbuf_state->joined_mbus)); } - dbuf_state->enabled_slices = i915->display.dbuf.enabled_slices; -} - -static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915) -{ - const struct intel_dbuf_state *dbuf_state = - to_intel_dbuf_state(i915->display.dbuf.obj.state); - struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; - struct intel_crtc *crtc; - - for_each_intel_crtc(&i915->drm, crtc) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - entries[crtc->pipe] = crtc_state->wm.skl.ddb; - } - - for_each_intel_crtc(&i915->drm, crtc) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - u8 slices; - - slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, - dbuf_state->joined_mbus); - if (dbuf_state->slices[crtc->pipe] & ~slices) - return true; - - if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries, - I915_MAX_PIPES, crtc->pipe)) - return true; - } - - return false; -} - -static void skl_wm_sanitize(struct drm_i915_private *i915) -{ - struct intel_crtc *crtc; - - /* - * On TGL/RKL (at least) the BIOS likes to assign the planes - * to the wrong DBUF slices. This will cause an infinite loop - * in skl_commit_modeset_enables() as it can't find a way to - * transition between the old bogus DBUF layout to the new - * proper DBUF layout without DBUF allocation overlaps between - * the planes (which cannot be allowed or else the hardware - * may hang). If we detect a bogus DBUF layout just turn off - * all the planes so that skl_commit_modeset_enables() can - * simply ignore them. - */ - if (!skl_dbuf_is_misconfigured(i915)) - return; - - drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n"); - - for_each_intel_crtc(&i915->drm, crtc) { - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - const struct intel_plane_state *plane_state = - to_intel_plane_state(plane->base.state); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - if (plane_state->uapi.visible) - intel_plane_disable_noatomic(crtc, plane); - - drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0); - - memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb)); - } -} - -static void skl_wm_get_hw_state_and_sanitize(struct drm_i915_private *i915) -{ - skl_wm_get_hw_state(i915); - skl_wm_sanitize(i915); -} - -void intel_wm_state_verify(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_i915_private *i915 = to_i915(state->base.dev); - const struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - struct skl_hw_state { - struct skl_ddb_entry ddb[I915_MAX_PLANES]; - struct skl_ddb_entry ddb_y[I915_MAX_PLANES]; - struct skl_pipe_wm wm; - } *hw; - const struct skl_pipe_wm *sw_wm = &new_crtc_state->wm.skl.optimal; - struct intel_plane *plane; - u8 hw_enabled_slices; - int level; - - if (DISPLAY_VER(i915) < 9 || !new_crtc_state->hw.active) - return; - - hw = kzalloc(sizeof(*hw), GFP_KERNEL); - if (!hw) - return; - - skl_pipe_wm_get_hw_state(crtc, &hw->wm); - - skl_pipe_ddb_get_hw_state(crtc, hw->ddb, hw->ddb_y); - - hw_enabled_slices = intel_enabled_dbuf_slices_mask(i915); - - if (DISPLAY_VER(i915) >= 11 && - hw_enabled_slices != i915->display.dbuf.enabled_slices) - drm_err(&i915->drm, - "mismatch in DBUF Slices (expected 0x%x, got 0x%x)\n", - i915->display.dbuf.enabled_slices, - hw_enabled_slices); - - for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) { - const struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry; - const struct skl_wm_level *hw_wm_level, *sw_wm_level; - - /* Watermarks */ - for (level = 0; level < i915->display.wm.num_levels; level++) { - hw_wm_level = &hw->wm.planes[plane->id].wm[level]; - sw_wm_level = skl_plane_wm_level(sw_wm, plane->id, level); - - if (skl_wm_level_equals(hw_wm_level, sw_wm_level)) - continue; - - drm_err(&i915->drm, - "[PLANE:%d:%s] mismatch in WM%d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - plane->base.base.id, plane->base.name, level, - sw_wm_level->enable, - sw_wm_level->blocks, - sw_wm_level->lines, - hw_wm_level->enable, - hw_wm_level->blocks, - hw_wm_level->lines); - } - - hw_wm_level = &hw->wm.planes[plane->id].trans_wm; - sw_wm_level = skl_plane_trans_wm(sw_wm, plane->id); - - if (!skl_wm_level_equals(hw_wm_level, sw_wm_level)) { - drm_err(&i915->drm, - "[PLANE:%d:%s] mismatch in trans WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - plane->base.base.id, plane->base.name, - sw_wm_level->enable, - sw_wm_level->blocks, - sw_wm_level->lines, - hw_wm_level->enable, - hw_wm_level->blocks, - hw_wm_level->lines); - } - - hw_wm_level = &hw->wm.planes[plane->id].sagv.wm0; - sw_wm_level = &sw_wm->planes[plane->id].sagv.wm0; - - if (HAS_HW_SAGV_WM(i915) && - !skl_wm_level_equals(hw_wm_level, sw_wm_level)) { - drm_err(&i915->drm, - "[PLANE:%d:%s] mismatch in SAGV WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - plane->base.base.id, plane->base.name, - sw_wm_level->enable, - sw_wm_level->blocks, - sw_wm_level->lines, - hw_wm_level->enable, - hw_wm_level->blocks, - hw_wm_level->lines); - } - - hw_wm_level = &hw->wm.planes[plane->id].sagv.trans_wm; - sw_wm_level = &sw_wm->planes[plane->id].sagv.trans_wm; - - if (HAS_HW_SAGV_WM(i915) && - !skl_wm_level_equals(hw_wm_level, sw_wm_level)) { - drm_err(&i915->drm, - "[PLANE:%d:%s] mismatch in SAGV trans WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - plane->base.base.id, plane->base.name, - sw_wm_level->enable, - sw_wm_level->blocks, - sw_wm_level->lines, - hw_wm_level->enable, - hw_wm_level->blocks, - hw_wm_level->lines); - } - - /* DDB */ - hw_ddb_entry = &hw->ddb[PLANE_CURSOR]; - sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb[PLANE_CURSOR]; - - if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { - drm_err(&i915->drm, - "[PLANE:%d:%s] mismatch in DDB (expected (%u,%u), found (%u,%u))\n", - plane->base.base.id, plane->base.name, - sw_ddb_entry->start, sw_ddb_entry->end, - hw_ddb_entry->start, hw_ddb_entry->end); - } - } - - kfree(hw); + dbuf_state->enabled_slices = display->dbuf.enabled_slices; } bool skl_watermark_ipc_enabled(struct drm_i915_private *i915) @@ -3386,31 +3281,19 @@ static void skl_read_wm_latency(struct drm_i915_private *i915, u16 wm[]) static void skl_setup_wm_latency(struct drm_i915_private *i915) { - if (HAS_HW_SAGV_WM(i915)) - i915->display.wm.num_levels = 6; - else - i915->display.wm.num_levels = 8; + struct intel_display *display = &i915->display; - if (DISPLAY_VER(i915) >= 14) - mtl_read_wm_latency(i915, i915->display.wm.skl_latency); + if (HAS_HW_SAGV_WM(display)) + display->wm.num_levels = 6; else - skl_read_wm_latency(i915, i915->display.wm.skl_latency); - - intel_print_wm_latency(i915, "Gen9 Plane", i915->display.wm.skl_latency); -} - -static const struct intel_wm_funcs skl_wm_funcs = { - .compute_global_watermarks = skl_compute_wm, - .get_hw_state = skl_wm_get_hw_state_and_sanitize, -}; - -void skl_wm_init(struct drm_i915_private *i915) -{ - intel_sagv_init(i915); + display->wm.num_levels = 8; - skl_setup_wm_latency(i915); + if (DISPLAY_VER(display) >= 14) + mtl_read_wm_latency(i915, display->wm.skl_latency); + else + skl_read_wm_latency(i915, display->wm.skl_latency); - i915->display.funcs.wm = &skl_wm_funcs; + intel_print_wm_latency(i915, "Gen9 Plane", display->wm.skl_latency); } static struct intel_global_state *intel_dbuf_duplicate_state(struct intel_global_obj *obj) @@ -3450,13 +3333,14 @@ intel_atomic_get_dbuf_state(struct intel_atomic_state *state) int intel_dbuf_init(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct intel_dbuf_state *dbuf_state; dbuf_state = kzalloc(sizeof(*dbuf_state), GFP_KERNEL); if (!dbuf_state) return -ENOMEM; - intel_atomic_global_obj_init(i915, &i915->display.dbuf.obj, + intel_atomic_global_obj_init(display, &display->dbuf.obj, &dbuf_state->base, &intel_dbuf_funcs); return 0; @@ -3466,38 +3350,27 @@ static bool xelpdp_is_only_pipe_per_dbuf_bank(enum pipe pipe, u8 active_pipes) { switch (pipe) { case PIPE_A: - return !(active_pipes & BIT(PIPE_D)); case PIPE_D: - return !(active_pipes & BIT(PIPE_A)); + active_pipes &= BIT(PIPE_A) | BIT(PIPE_D); + break; case PIPE_B: - return !(active_pipes & BIT(PIPE_C)); case PIPE_C: - return !(active_pipes & BIT(PIPE_B)); + active_pipes &= BIT(PIPE_B) | BIT(PIPE_C); + break; default: /* to suppress compiler warning */ MISSING_CASE(pipe); - break; + return false; } - return false; + return is_power_of_2(active_pipes); } -static void intel_mbus_dbox_update(struct intel_atomic_state *state) +static u32 pipe_mbus_dbox_ctl(const struct intel_crtc *crtc, + const struct intel_dbuf_state *dbuf_state) { - struct drm_i915_private *i915 = to_i915(state->base.dev); - const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state; - const struct intel_crtc *crtc; + struct drm_i915_private *i915 = to_i915(crtc->base.dev); u32 val = 0; - if (DISPLAY_VER(i915) < 11) - return; - - new_dbuf_state = intel_atomic_get_new_dbuf_state(state); - old_dbuf_state = intel_atomic_get_old_dbuf_state(state); - if (!new_dbuf_state || - (new_dbuf_state->joined_mbus == old_dbuf_state->joined_mbus && - new_dbuf_state->active_pipes == old_dbuf_state->active_pipes)) - return; - if (DISPLAY_VER(i915) >= 14) val |= MBUS_DBOX_I_CREDIT(2); @@ -3508,12 +3381,12 @@ static void intel_mbus_dbox_update(struct intel_atomic_state *state) } if (DISPLAY_VER(i915) >= 14) - val |= new_dbuf_state->joined_mbus ? MBUS_DBOX_A_CREDIT(12) : - MBUS_DBOX_A_CREDIT(8); + val |= dbuf_state->joined_mbus ? + MBUS_DBOX_A_CREDIT(12) : MBUS_DBOX_A_CREDIT(8); else if (IS_ALDERLAKE_P(i915)) /* Wa_22010947358:adl-p */ - val |= new_dbuf_state->joined_mbus ? MBUS_DBOX_A_CREDIT(6) : - MBUS_DBOX_A_CREDIT(4); + val |= dbuf_state->joined_mbus ? + MBUS_DBOX_A_CREDIT(6) : MBUS_DBOX_A_CREDIT(4); else val |= MBUS_DBOX_A_CREDIT(2); @@ -3530,19 +3403,42 @@ static void intel_mbus_dbox_update(struct intel_atomic_state *state) val |= MBUS_DBOX_B_CREDIT(8); } - for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, new_dbuf_state->active_pipes) { - u32 pipe_val = val; + if (DISPLAY_VERx100(i915) == 1400) { + if (xelpdp_is_only_pipe_per_dbuf_bank(crtc->pipe, dbuf_state->active_pipes)) + val |= MBUS_DBOX_BW_8CREDITS_MTL; + else + val |= MBUS_DBOX_BW_4CREDITS_MTL; + } - if (DISPLAY_VERx100(i915) == 1400) { - if (xelpdp_is_only_pipe_per_dbuf_bank(crtc->pipe, - new_dbuf_state->active_pipes)) - pipe_val |= MBUS_DBOX_BW_8CREDITS_MTL; - else - pipe_val |= MBUS_DBOX_BW_4CREDITS_MTL; - } + return val; +} - intel_de_write(i915, PIPE_MBUS_DBOX_CTL(crtc->pipe), pipe_val); - } +static void pipe_mbus_dbox_ctl_update(struct drm_i915_private *i915, + const struct intel_dbuf_state *dbuf_state) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, dbuf_state->active_pipes) + intel_de_write(i915, PIPE_MBUS_DBOX_CTL(crtc->pipe), + pipe_mbus_dbox_ctl(crtc, dbuf_state)); +} + +static void intel_mbus_dbox_update(struct intel_atomic_state *state) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state; + + if (DISPLAY_VER(i915) < 11) + return; + + new_dbuf_state = intel_atomic_get_new_dbuf_state(state); + old_dbuf_state = intel_atomic_get_old_dbuf_state(state); + if (!new_dbuf_state || + (new_dbuf_state->joined_mbus == old_dbuf_state->joined_mbus && + new_dbuf_state->active_pipes == old_dbuf_state->active_pipes)) + return; + + pipe_mbus_dbox_ctl_update(i915, new_dbuf_state); } int intel_dbuf_state_set_mdclk_cdclk_ratio(struct intel_atomic_state *state, @@ -3562,23 +3458,24 @@ int intel_dbuf_state_set_mdclk_cdclk_ratio(struct intel_atomic_state *state, void intel_dbuf_mdclk_cdclk_ratio_update(struct drm_i915_private *i915, int ratio, bool joined_mbus) { + struct intel_display *display = &i915->display; enum dbuf_slice slice; - if (!HAS_MBUS_JOINING(i915)) + if (!HAS_MBUS_JOINING(display)) return; - if (DISPLAY_VER(i915) >= 20) - intel_de_rmw(i915, MBUS_CTL, MBUS_TRANSLATION_THROTTLE_MIN_MASK, + if (DISPLAY_VER(display) >= 20) + intel_de_rmw(display, MBUS_CTL, MBUS_TRANSLATION_THROTTLE_MIN_MASK, MBUS_TRANSLATION_THROTTLE_MIN(ratio - 1)); if (joined_mbus) ratio *= 2; - drm_dbg_kms(&i915->drm, "Updating dbuf ratio to %d (mbus joined: %s)\n", + drm_dbg_kms(display->drm, "Updating dbuf ratio to %d (mbus joined: %s)\n", ratio, str_yes_no(joined_mbus)); - for_each_dbuf_slice(i915, slice) - intel_de_rmw(i915, DBUF_CTL_S(slice), + for_each_dbuf_slice(display, slice) + intel_de_rmw(display, DBUF_CTL_S(slice), DBUF_MIN_TRACKER_STATE_SERVICE_MASK, DBUF_MIN_TRACKER_STATE_SERVICE(ratio - 1)); } @@ -3625,22 +3522,13 @@ static enum pipe intel_mbus_joined_pipe(struct intel_atomic_state *state, return INVALID_PIPE; } -static void intel_dbuf_mbus_join_update(struct intel_atomic_state *state, - enum pipe pipe) +static void mbus_ctl_join_update(struct drm_i915_private *i915, + const struct intel_dbuf_state *dbuf_state, + enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(state->base.dev); - const struct intel_dbuf_state *old_dbuf_state = - intel_atomic_get_old_dbuf_state(state); - const struct intel_dbuf_state *new_dbuf_state = - intel_atomic_get_new_dbuf_state(state); u32 mbus_ctl; - drm_dbg_kms(&i915->drm, "Changing mbus joined: %s -> %s (pipe: %c)\n", - str_yes_no(old_dbuf_state->joined_mbus), - str_yes_no(new_dbuf_state->joined_mbus), - pipe != INVALID_PIPE ? pipe_name(pipe) : '*'); - - if (new_dbuf_state->joined_mbus) + if (dbuf_state->joined_mbus) mbus_ctl = MBUS_HASHING_MODE_1x4 | MBUS_JOIN; else mbus_ctl = MBUS_HASHING_MODE_2x2; @@ -3655,6 +3543,23 @@ static void intel_dbuf_mbus_join_update(struct intel_atomic_state *state, MBUS_JOIN_PIPE_SELECT_MASK, mbus_ctl); } +static void intel_dbuf_mbus_join_update(struct intel_atomic_state *state, + enum pipe pipe) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_dbuf_state *old_dbuf_state = + intel_atomic_get_old_dbuf_state(state); + const struct intel_dbuf_state *new_dbuf_state = + intel_atomic_get_new_dbuf_state(state); + + drm_dbg_kms(&i915->drm, "Changing mbus joined: %s -> %s (pipe: %c)\n", + str_yes_no(old_dbuf_state->joined_mbus), + str_yes_no(new_dbuf_state->joined_mbus), + pipe != INVALID_PIPE ? pipe_name(pipe) : '*'); + + mbus_ctl_join_update(i915, new_dbuf_state, pipe); +} + void intel_dbuf_mbus_pre_ddb_update(struct intel_atomic_state *state) { const struct intel_dbuf_state *new_dbuf_state = @@ -3757,6 +3662,245 @@ void intel_dbuf_post_plane_update(struct intel_atomic_state *state) gen9_dbuf_slices_update(i915, new_slices); } +static void skl_mbus_sanitize(struct drm_i915_private *i915) +{ + struct intel_display *display = &i915->display; + struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(display->dbuf.obj.state); + + if (!HAS_MBUS_JOINING(display)) + return; + + if (!dbuf_state->joined_mbus || + adlp_check_mbus_joined(dbuf_state->active_pipes)) + return; + + drm_dbg_kms(display->drm, "Disabling redundant MBUS joining (active pipes 0x%x)\n", + dbuf_state->active_pipes); + + dbuf_state->joined_mbus = false; + intel_dbuf_mdclk_cdclk_ratio_update(i915, + dbuf_state->mdclk_cdclk_ratio, + dbuf_state->joined_mbus); + pipe_mbus_dbox_ctl_update(i915, dbuf_state); + mbus_ctl_join_update(i915, dbuf_state, INVALID_PIPE); +} + +static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915) +{ + const struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(i915->display.dbuf.obj.state); + struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + entries[crtc->pipe] = crtc_state->wm.skl.ddb; + } + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + u8 slices; + + slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, + dbuf_state->joined_mbus); + if (dbuf_state->slices[crtc->pipe] & ~slices) + return true; + + if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries, + I915_MAX_PIPES, crtc->pipe)) + return true; + } + + return false; +} + +static void skl_dbuf_sanitize(struct drm_i915_private *i915) +{ + struct intel_crtc *crtc; + + /* + * On TGL/RKL (at least) the BIOS likes to assign the planes + * to the wrong DBUF slices. This will cause an infinite loop + * in skl_commit_modeset_enables() as it can't find a way to + * transition between the old bogus DBUF layout to the new + * proper DBUF layout without DBUF allocation overlaps between + * the planes (which cannot be allowed or else the hardware + * may hang). If we detect a bogus DBUF layout just turn off + * all the planes so that skl_commit_modeset_enables() can + * simply ignore them. + */ + if (!skl_dbuf_is_misconfigured(i915)) + return; + + drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n"); + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + if (plane_state->uapi.visible) + intel_plane_disable_noatomic(crtc, plane); + + drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0); + + memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb)); + } +} + +static void skl_wm_get_hw_state_and_sanitize(struct drm_i915_private *i915) +{ + skl_wm_get_hw_state(i915); + + skl_mbus_sanitize(i915); + skl_dbuf_sanitize(i915); +} + +void intel_wm_state_verify(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(state); + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct skl_hw_state { + struct skl_ddb_entry ddb[I915_MAX_PLANES]; + struct skl_ddb_entry ddb_y[I915_MAX_PLANES]; + u16 min_ddb[I915_MAX_PLANES]; + u16 interim_ddb[I915_MAX_PLANES]; + struct skl_pipe_wm wm; + } *hw; + const struct skl_pipe_wm *sw_wm = &new_crtc_state->wm.skl.optimal; + struct intel_plane *plane; + u8 hw_enabled_slices; + int level; + + if (DISPLAY_VER(i915) < 9 || !new_crtc_state->hw.active) + return; + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + if (!hw) + return; + + skl_pipe_wm_get_hw_state(crtc, &hw->wm); + + skl_pipe_ddb_get_hw_state(crtc, hw->ddb, hw->ddb_y, hw->min_ddb, hw->interim_ddb); + + hw_enabled_slices = intel_enabled_dbuf_slices_mask(i915); + + if (DISPLAY_VER(i915) >= 11 && + hw_enabled_slices != i915->display.dbuf.enabled_slices) + drm_err(&i915->drm, + "mismatch in DBUF Slices (expected 0x%x, got 0x%x)\n", + i915->display.dbuf.enabled_slices, + hw_enabled_slices); + + for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) { + const struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry; + const struct skl_wm_level *hw_wm_level, *sw_wm_level; + + /* Watermarks */ + for (level = 0; level < i915->display.wm.num_levels; level++) { + hw_wm_level = &hw->wm.planes[plane->id].wm[level]; + sw_wm_level = skl_plane_wm_level(sw_wm, plane->id, level); + + if (skl_wm_level_equals(hw_wm_level, sw_wm_level)) + continue; + + drm_err(&i915->drm, + "[PLANE:%d:%s] mismatch in WM%d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, level, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); + } + + hw_wm_level = &hw->wm.planes[plane->id].trans_wm; + sw_wm_level = skl_plane_trans_wm(sw_wm, plane->id); + + if (!skl_wm_level_equals(hw_wm_level, sw_wm_level)) { + drm_err(&i915->drm, + "[PLANE:%d:%s] mismatch in trans WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); + } + + hw_wm_level = &hw->wm.planes[plane->id].sagv.wm0; + sw_wm_level = &sw_wm->planes[plane->id].sagv.wm0; + + if (HAS_HW_SAGV_WM(display) && + !skl_wm_level_equals(hw_wm_level, sw_wm_level)) { + drm_err(&i915->drm, + "[PLANE:%d:%s] mismatch in SAGV WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); + } + + hw_wm_level = &hw->wm.planes[plane->id].sagv.trans_wm; + sw_wm_level = &sw_wm->planes[plane->id].sagv.trans_wm; + + if (HAS_HW_SAGV_WM(display) && + !skl_wm_level_equals(hw_wm_level, sw_wm_level)) { + drm_err(&i915->drm, + "[PLANE:%d:%s] mismatch in SAGV trans WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); + } + + /* DDB */ + hw_ddb_entry = &hw->ddb[PLANE_CURSOR]; + sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb[PLANE_CURSOR]; + + if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { + drm_err(&i915->drm, + "[PLANE:%d:%s] mismatch in DDB (expected (%u,%u), found (%u,%u))\n", + plane->base.base.id, plane->base.name, + sw_ddb_entry->start, sw_ddb_entry->end, + hw_ddb_entry->start, hw_ddb_entry->end); + } + } + + kfree(hw); +} + +static const struct intel_wm_funcs skl_wm_funcs = { + .compute_global_watermarks = skl_compute_wm, + .get_hw_state = skl_wm_get_hw_state_and_sanitize, +}; + +void skl_wm_init(struct drm_i915_private *i915) +{ + intel_sagv_init(i915); + + skl_setup_wm_latency(i915); + + i915->display.funcs.wm = &skl_wm_funcs; +} + static int skl_watermark_ipc_status_show(struct seq_file *m, void *data) { struct drm_i915_private *i915 = m->private; @@ -3830,13 +3974,14 @@ DEFINE_SHOW_ATTRIBUTE(intel_sagv_status); void skl_watermark_debugfs_register(struct drm_i915_private *i915) { - struct drm_minor *minor = i915->drm.primary; + struct intel_display *display = &i915->display; + struct drm_minor *minor = display->drm->primary; - if (HAS_IPC(i915)) + if (HAS_IPC(display)) debugfs_create_file("i915_ipc_status", 0644, minor->debugfs_root, i915, &skl_watermark_ipc_status_fops); - if (HAS_SAGV(i915)) + if (HAS_SAGV(display)) debugfs_create_file("i915_sagv_status", 0444, minor->debugfs_root, i915, &intel_sagv_status_fops); } diff --git a/drivers/gpu/drm/i915/display/skl_watermark.h b/drivers/gpu/drm/i915/display/skl_watermark.h index e73baec94873..8659f89427f2 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.h +++ b/drivers/gpu/drm/i915/display/skl_watermark.h @@ -18,6 +18,7 @@ struct intel_bw_state; struct intel_crtc; struct intel_crtc_state; struct intel_plane; +struct intel_plane_state; struct skl_pipe_wm; struct skl_wm_level; @@ -53,6 +54,9 @@ const struct skl_wm_level *skl_plane_wm_level(const struct skl_pipe_wm *pipe_wm, int level); const struct skl_wm_level *skl_plane_trans_wm(const struct skl_pipe_wm *pipe_wm, enum plane_id plane_id); +unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, + struct intel_plane *plane, int width, + int height, int cpp); struct intel_dbuf_state { struct intel_global_state base; @@ -87,6 +91,7 @@ void intel_dbuf_mdclk_cdclk_ratio_update(struct drm_i915_private *i915, int ratio, bool joined_mbus); void intel_dbuf_mbus_pre_ddb_update(struct intel_atomic_state *state); void intel_dbuf_mbus_post_ddb_update(struct intel_atomic_state *state); +void intel_program_dpkgc_latency(struct intel_atomic_state *state); #endif /* __SKL_WATERMARK_H__ */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index e7d9f5897d87..c4d731ab28eb 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -67,9 +67,8 @@ static u16 pixels_from_txbyteclkhs(u16 clk_hs, int bpp, int lane_count, (bpp * burst_mode_ratio)); } -enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) +static enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) { - /* It just so happens the VBT matches register contents. */ switch (fmt) { case VID_MODE_FORMAT_RGB888: return MIPI_DSI_FMT_RGB888; @@ -1760,6 +1759,31 @@ static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) intel_dsi_log_params(intel_dsi); } +int vlv_dsi_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) + return 0; + + /* + * On Valleyview some DSI panels lose (v|h)sync when the clock is lower + * than 320000KHz. + */ + if (IS_VALLEYVIEW(dev_priv)) + return 320000; + + /* + * On Geminilake once the CDCLK gets as low as 79200 + * picture gets unstable, despite that values are + * correct for DSI PLL and DE PLL. + */ + if (IS_GEMINILAKE(dev_priv)) + return 158400; + + return 0; +} + typedef void (*vlv_dsi_dmi_quirk_func)(struct intel_dsi *intel_dsi); /* diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.h b/drivers/gpu/drm/i915/display/vlv_dsi.h index cf9d7b82f288..277bacfbc551 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.h +++ b/drivers/gpu/drm/i915/display/vlv_dsi.h @@ -6,21 +6,20 @@ #ifndef __VLV_DSI_H__ #define __VLV_DSI_H__ -#include <linux/types.h> - enum port; struct drm_i915_private; +struct intel_crtc_state; struct intel_dsi; #ifdef I915 void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port); -enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); +int vlv_dsi_min_cdclk(const struct intel_crtc_state *crtc_state); void vlv_dsi_init(struct drm_i915_private *dev_priv); #else static inline void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { } -static inline enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) +static inline int vlv_dsi_min_cdclk(const struct intel_crtc_state *crtc_state) { return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 3198b64ad7db..388f90784d8a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -53,29 +53,6 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) } /** - * __i915_gem_object_is_lmem - Whether the object is resident in - * lmem while in the fence signaling critical path. - * @obj: The object to check. - * - * This function is intended to be called from within the fence signaling - * path where the fence, or a pin, keeps the object from being migrated. For - * example during gpu reset or similar. - * - * Return: Whether the object is resident in lmem. - */ -bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) -{ - struct intel_memory_region *mr = READ_ONCE(obj->mm.region); - -#ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) && - i915_gem_object_evictable(obj)); -#endif - return mr && (mr->type == INTEL_MEMORY_LOCAL || - mr->type == INTEL_MEMORY_STOLEN_LOCAL); -} - -/** * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the * minimum page size for the backing pages. * @i915: The i915 instance. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 5a7a14e85c3f..ecd8f1a633a1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -19,8 +19,6 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); -bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); - struct drm_i915_gem_object * i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, const void *data, size_t size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 3dc61cbd2e11..bb713e096db2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -283,9 +283,7 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - /* TODO: make DPT shrinkable when it has no bound vmas */ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE) && - !obj->is_dpt; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 89d4dc8b60c6..eb0158e43417 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -369,7 +369,7 @@ static int live_parallel_switch(void *arg) if (!data[n].ce[0]) continue; - worker = kthread_create_worker(0, "igt/parallel:%s", + worker = kthread_run_worker(0, "igt/parallel:%s", data[n].ce[0]->engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 40269e4c1e31..325da0414d94 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ba55c059063d..fe1f85e5dda3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -343,6 +343,11 @@ struct intel_engine_guc_stats { * @start_gt_clk: GT clock time of last idle to active transition. */ u64 start_gt_clk; + + /** + * @total: The last value of total returned + */ + u64 total; }; union intel_engine_tlb_inv_reg { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 72090f52fb85..4a80ffa1b962 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index d60a6ca0cae5..f6c59f20832f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -107,11 +107,12 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) /** * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM * @vm: The VM to suspend the mappings for + * @evict_all: Evict all VMAs * * Suspend the memory mappings for all objects mapped to HW via the GGTT or a * DPT page table. */ -void i915_ggtt_suspend_vm(struct i915_address_space *vm) +void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all) { struct i915_vma *vma, *vn; int save_skip_rewrite; @@ -157,7 +158,7 @@ retry: goto retry; } - if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { i915_vma_wait_for_bind(vma); __i915_vma_evict(vma, false); @@ -172,13 +173,15 @@ retry: vm->skip_pte_rewrite = save_skip_rewrite; mutex_unlock(&vm->mutex); + + drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list)); } void i915_ggtt_suspend(struct i915_ggtt *ggtt) { struct intel_gt *gt; - i915_ggtt_suspend_vm(&ggtt->vm); + i915_ggtt_suspend_vm(&ggtt->vm, false); ggtt->invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) @@ -1545,6 +1548,7 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915) /** * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM * @vm: The VM to restore the mappings for + * @all_evicted: Were all VMAs expected to be evicted on suspend? * * Restore the memory mappings for all objects mapped to HW via the GGTT or a * DPT page table. @@ -1552,13 +1556,18 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915) * Returns %true if restoring the mapping for any object that was in a write * domain before suspend. */ -bool i915_ggtt_resume_vm(struct i915_address_space *vm) +bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted) { struct i915_vma *vma; bool write_domain_objs = false; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); + if (all_evicted) { + drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list)); + return false; + } + /* First fill our portion of the GTT with scratch pages */ vm->clear_range(vm, 0, vm->total); @@ -1598,7 +1607,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) intel_gt_check_and_clear_faults(gt); - flush = i915_ggtt_resume_vm(&ggtt->vm); + flush = i915_ggtt_resume_vm(&ggtt->vm, false); if (drm_mm_node_allocated(&ggtt->error_capture)) ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 6b85222ee3ea..0a36ea751b63 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -608,8 +608,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags); -void i915_ggtt_suspend_vm(struct i915_address_space *vm); -bool i915_ggtt_resume_vm(struct i915_address_space *vm); +void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all); +bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index c864d101faf9..9378d5901c49 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -133,7 +133,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; - if (GRAPHICS_VER(gt->i915) >= 12) { + if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) if (HAS_ENGINE(gt, _VCS(i))) pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f42f21632306..aae5a081cb53 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1113,6 +1113,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * Warn CI about the unrecoverable wedged condition. * Time for a reboot. */ + gt_err(gt, "Unrecoverable wedged condition\n"); add_taint_for_CI(gt->i915, TAINT_WARN); return false; } @@ -1198,6 +1199,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask, const char *reason) { + struct intel_display *display = >->i915->display; intel_engine_mask_t awake; int ret; @@ -1243,7 +1245,7 @@ void intel_gt_reset(struct intel_gt *gt, if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) intel_irq_resume(gt->i915); - intel_overlay_reset(gt->i915); + intel_overlay_reset(display); /* sanitize uC after engine reset */ if (!intel_uc_uses_guc_submission(>->uc)) @@ -1263,8 +1265,10 @@ void intel_gt_reset(struct intel_gt *gt, } ret = resume(gt); - if (ret) + if (ret) { + gt_err(gt, "Failed to resume (%d)\n", ret); goto taint; + } finish: reset_finish(gt, awake); @@ -1607,6 +1611,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ + gt_err(gt, "Non-recoverable wedged on init\n"); add_taint_for_CI(gt->i915, TAINT_WARN); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 59da4b7bd262..b74d9205c0f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -308,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) return cs; } -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs + num_dwords); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_ring.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 1b32dadfb8c3..64b322e25f36 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -16,7 +16,6 @@ struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); -int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 32f3b52a183a..458e29d89978 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -26,6 +26,7 @@ #include "shmem_utils.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -230,8 +231,13 @@ static int xcs_resume(struct intel_engine_cs *engine) set_pp_dir(engine); - /* First wake the ring up to an empty/idle ring */ - for ((kt) = ktime_get() + (2 * NSEC_PER_MSEC); + /* + * First wake the ring up to an empty/idle ring. + * Use 50ms of delay to let the engine write successfully + * for all platforms. Experimented with different values and + * determined that 50ms works best based on testing. + */ + for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC); ktime_before(ktime_get(), (kt)); cpu_relax()) { /* * In case of resets fails because engine resumes from @@ -282,16 +288,16 @@ static int xcs_resume(struct intel_engine_cs *engine) return 0; err: - drm_err(&engine->i915->drm, - "%s initialization failed; " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); + gt_err(engine->gt, "%s initialization failed\n", engine->name); + ENGINE_TRACE(engine, + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + GEM_TRACE_DUMP(); return -EIO; } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 222ca7c44951..81c31396eceb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + worker[id] = kthread_run_worker(0, "igt/smoke:%d", id); if (IS_ERR(worker[id])) { err = PTR_ERR(worker[id]); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 9ce8ff1c04fe..9d3aeb237295 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - worker = kthread_create_worker(0, "igt/%s", + worker = kthread_run_worker(0, "igt/%s", other->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index ca460cee4f8b..1bf7b88d9a9d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate, { struct drm_i915_private *i915 = migrate->context->engine->i915; struct drm_i915_gem_object *obj; - struct i915_request *rq; + struct i915_request *rq = NULL; struct i915_gem_ww_ctx ww; u32 *vaddr, val = 0; bool ccs_cap = false; diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 1aa1446c8fb0..27b6d51ef145 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -8,6 +8,7 @@ #include "intel_gpu_commands.h" #include "intel_gt_requests.h" #include "intel_ring.h" +#include "intel_rps.h" #include "selftest_rc6.h" #include "selftests/i915_random.h" @@ -38,6 +39,9 @@ int live_rc6_manual(void *arg) ktime_t dt; u64 res[2]; int err = 0; + u32 rc0_freq = 0; + u32 rc6_freq = 0; + struct intel_rps *rps = >->rps; /* * Our claim is that we can "encourage" the GPU to enter rc6 at will. @@ -66,6 +70,7 @@ int live_rc6_manual(void *arg) rc0_power = librapl_energy_uJ() - rc0_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); + rc0_freq = intel_rps_read_actual_frequency_fw(rps); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n", (res[1] - res[0]) >> 10); @@ -77,7 +82,11 @@ int live_rc6_manual(void *arg) rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt)); if (!rc0_power) { - pr_err("No power measured while in RC0\n"); + if (rc0_freq) + pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n", + rc0_freq); + else + pr_err("No power and freq measured while in RC0\n"); err = -EINVAL; goto out_unlock; } @@ -90,7 +99,8 @@ int live_rc6_manual(void *arg) intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); rc6_power = librapl_energy_uJ(); - msleep(100); + msleep(1000); + rc6_freq = intel_rps_read_actual_frequency_fw(rps); rc6_power = librapl_energy_uJ() - rc6_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); @@ -108,7 +118,8 @@ int live_rc6_manual(void *arg) pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", rc0_power, rc6_power); if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6!\n"); + pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n", + rc6_freq, rc0_freq); err = -EINVAL; goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index dcef8d498919..c207a4fb03bf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -1125,6 +1125,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) static u64 measure_power_at(struct intel_rps *rps, int *freq) { *freq = rps_set_check(rps, *freq); + msleep(100); return measure_power(rps, freq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4ecc4ae74a54..e218b229681f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -489,7 +489,7 @@ static int live_slpc_tile_interaction(void *arg) return -ENOMEM; for_each_gt(gt, i915, i) { - threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id); + threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id); if (IS_ERR(threads[i].worker)) { ret = PTR_ERR(threads[i].worker); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 222c95f62156..e8a04e476c57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -18,7 +18,7 @@ #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9ede6f240d79..12f1ba7ca9c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1243,6 +1243,21 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; @@ -1363,9 +1378,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); } static void guc_enable_busyness_worker(struct intel_guc *guc) @@ -1431,8 +1449,21 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); @@ -1543,6 +1574,9 @@ err_trylock: static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1550,6 +1584,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -1688,6 +1725,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -2005,6 +2046,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { + int outstanding; + /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || !intel_guc_is_fw_running(guc) || @@ -2018,8 +2061,10 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) * see in CI if this happens frequently / a precursor to taking down the * machine. */ - if (atomic_read(&guc->outstanding_submission_g2h)) - guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d7ac31c3254c..b3cbf85c00cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -427,19 +427,6 @@ void intel_huc_fini(struct intel_huc *huc) intel_uc_fw_fini(&huc->fw); } -void intel_huc_suspend(struct intel_huc *huc) -{ - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - /* - * in the unlikely case that we're suspending before the GSC has - * completed its loading sequence, just stop waiting. We'll restart - * on resume. - */ - delayed_huc_load_complete(huc); -} - static const char *auth_mode_string(struct intel_huc *huc, enum intel_huc_authentication_type type) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index ba5cb08e9e7b..d5e441b9e08d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -57,7 +57,6 @@ int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); -void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); int intel_huc_wait_for_auth_complete(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 81d67a46cd9e..6439c8e91a8d 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1286,6 +1286,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = &dev_priv->display; struct plane_code_mapping gen8_plane_code[] = { [0] = {PIPE_A, PLANE_A, PRIMARY_A_FLIP_DONE}, [1] = {PIPE_B, PLANE_A, PRIMARY_B_FLIP_DONE}, @@ -1314,9 +1315,9 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1); if (info->plane == PLANE_A) { - info->ctrl_reg = DSPCNTR(dev_priv, info->pipe); - info->stride_reg = DSPSTRIDE(dev_priv, info->pipe); - info->surf_reg = DSPSURF(dev_priv, info->pipe); + info->ctrl_reg = DSPCNTR(display, info->pipe); + info->stride_reg = DSPSTRIDE(display, info->pipe); + info->surf_reg = DSPSURF(display, info->pipe); } else if (info->plane == PLANE_B) { info->ctrl_reg = SPRCTL(info->pipe); info->stride_reg = SPRSTRIDE(info->pipe); @@ -1332,6 +1333,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = &dev_priv->display; struct intel_vgpu *vgpu = s->vgpu; u32 dword0 = cmd_val(s, 0); u32 dword1 = cmd_val(s, 1); @@ -1380,9 +1382,9 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, info->surf_val = (dword2 & GENMASK(31, 12)) >> 12; info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1); - info->ctrl_reg = DSPCNTR(dev_priv, info->pipe); - info->stride_reg = DSPSTRIDE(dev_priv, info->pipe); - info->surf_reg = DSPSURF(dev_priv, info->pipe); + info->ctrl_reg = DSPCNTR(display, info->pipe); + info->stride_reg = DSPSTRIDE(display, info->pipe); + info->surf_reg = DSPSURF(display, info->pipe); return 0; } @@ -1419,6 +1421,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( struct mi_display_flip_command_info *info) { struct drm_i915_private *dev_priv = s->engine->i915; + struct intel_display *display = &dev_priv->display; struct intel_vgpu *vgpu = s->vgpu; set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), @@ -1436,7 +1439,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( } if (info->plane == PLANE_PRIMARY) - vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(dev_priv, info->pipe))++; + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(display, info->pipe))++; if (info->async_flip) intel_vgpu_trigger_virtual_event(vgpu, info->event); diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 17f74cb244bb..95570cabdf27 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -40,6 +40,7 @@ #include "display/bxt_dpio_phy_regs.h" #include "display/i9xx_plane_regs.h" +#include "display/intel_crt_regs.h" #include "display/intel_cursor_regs.h" #include "display/intel_display.h" #include "display/intel_dpio_phy.h" @@ -68,8 +69,9 @@ static int get_edp_pipe(struct intel_vgpu *vgpu) static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; - if (!(vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, TRANSCODER_EDP)) & TRANSCONF_ENABLE)) + if (!(vgpu_vreg_t(vgpu, TRANSCONF(display, TRANSCODER_EDP)) & TRANSCONF_ENABLE)) return 0; if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE)) @@ -80,12 +82,13 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; if (drm_WARN_ON(&dev_priv->drm, pipe < PIPE_A || pipe >= I915_MAX_PIPES)) return -EINVAL; - if (vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, pipe)) & TRANSCONF_ENABLE) + if (vgpu_vreg_t(vgpu, TRANSCONF(display, pipe)) & TRANSCONF_ENABLE) return 1; if (edp_pipe_is_enabled(vgpu) && @@ -180,6 +183,7 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; int pipe; if (IS_BROXTON(dev_priv)) { @@ -192,21 +196,21 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) GEN8_DE_PORT_HOTPLUG(HPD_PORT_B) | GEN8_DE_PORT_HOTPLUG(HPD_PORT_C)); - for_each_pipe(dev_priv, pipe) { - vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, pipe)) &= + for_each_pipe(display, pipe) { + vgpu_vreg_t(vgpu, TRANSCONF(display, pipe)) &= ~(TRANSCONF_ENABLE | TRANSCONF_STATE_ENABLE); - vgpu_vreg_t(vgpu, DSPCNTR(dev_priv, pipe)) &= ~DISP_ENABLE; + vgpu_vreg_t(vgpu, DSPCNTR(display, pipe)) &= ~DISP_ENABLE; vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; - vgpu_vreg_t(vgpu, CURCNTR(dev_priv, pipe)) &= ~MCURSOR_MODE_MASK; - vgpu_vreg_t(vgpu, CURCNTR(dev_priv, pipe)) |= MCURSOR_MODE_DISABLE; + vgpu_vreg_t(vgpu, CURCNTR(display, pipe)) &= ~MCURSOR_MODE_MASK; + vgpu_vreg_t(vgpu, CURCNTR(display, pipe)) |= MCURSOR_MODE_DISABLE; } for (trans = TRANSCODER_A; trans <= TRANSCODER_EDP; trans++) { - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, trans)) &= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, trans)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK | TRANS_DDI_FUNC_ENABLE); } - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); @@ -254,8 +258,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) * TRANSCODER_A can be enabled. PORT_x depends on the input of * setup_virtual_dp_monitor. */ - vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, TRANSCODER_A)) |= TRANSCONF_ENABLE; - vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, TRANSCODER_A)) |= TRANSCONF_STATE_ENABLE; + vgpu_vreg_t(vgpu, TRANSCONF(display, TRANSCODER_A)) |= TRANSCONF_ENABLE; + vgpu_vreg_t(vgpu, TRANSCONF(display, TRANSCODER_A)) |= TRANSCONF_STATE_ENABLE; /* * Golden M/N are calculated based on: @@ -263,11 +267,11 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) * DP link clk 1620 MHz and non-constant_n. * TODO: calculate DP link symbol clk and stream clk m/n. */ - vgpu_vreg_t(vgpu, PIPE_DATA_M1(dev_priv, TRANSCODER_A)) = TU_SIZE(64); - vgpu_vreg_t(vgpu, PIPE_DATA_M1(dev_priv, TRANSCODER_A)) |= 0x5b425e; - vgpu_vreg_t(vgpu, PIPE_DATA_N1(dev_priv, TRANSCODER_A)) = 0x800000; - vgpu_vreg_t(vgpu, PIPE_LINK_M1(dev_priv, TRANSCODER_A)) = 0x3cd6e; - vgpu_vreg_t(vgpu, PIPE_LINK_N1(dev_priv, TRANSCODER_A)) = 0x80000; + vgpu_vreg_t(vgpu, PIPE_DATA_M1(display, TRANSCODER_A)) = TU_SIZE(64); + vgpu_vreg_t(vgpu, PIPE_DATA_M1(display, TRANSCODER_A)) |= 0x5b425e; + vgpu_vreg_t(vgpu, PIPE_DATA_N1(display, TRANSCODER_A)) = 0x800000; + vgpu_vreg_t(vgpu, PIPE_LINK_M1(display, TRANSCODER_A)) = 0x3cd6e; + vgpu_vreg_t(vgpu, PIPE_LINK_N1(display, TRANSCODER_A)) = 0x80000; /* Enable per-DDI/PORT vreg */ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { @@ -290,7 +294,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) &= ~DDI_BUF_IS_IDLE; vgpu_vreg_t(vgpu, - TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_EDP)) |= + TRANS_DDI_FUNC_CTL(display, TRANSCODER_EDP)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | TRANS_DDI_FUNC_ENABLE); vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= @@ -320,7 +324,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; vgpu_vreg_t(vgpu, - TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) |= + TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); @@ -351,7 +355,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; vgpu_vreg_t(vgpu, - TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) |= + TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); @@ -400,11 +404,11 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) * DP link clk 1620 MHz and non-constant_n. * TODO: calculate DP link symbol clk and stream clk m/n. */ - vgpu_vreg_t(vgpu, PIPE_DATA_M1(dev_priv, TRANSCODER_A)) = TU_SIZE(64); - vgpu_vreg_t(vgpu, PIPE_DATA_M1(dev_priv, TRANSCODER_A)) |= 0x5b425e; - vgpu_vreg_t(vgpu, PIPE_DATA_N1(dev_priv, TRANSCODER_A)) = 0x800000; - vgpu_vreg_t(vgpu, PIPE_LINK_M1(dev_priv, TRANSCODER_A)) = 0x3cd6e; - vgpu_vreg_t(vgpu, PIPE_LINK_N1(dev_priv, TRANSCODER_A)) = 0x80000; + vgpu_vreg_t(vgpu, PIPE_DATA_M1(display, TRANSCODER_A)) = TU_SIZE(64); + vgpu_vreg_t(vgpu, PIPE_DATA_M1(display, TRANSCODER_A)) |= 0x5b425e; + vgpu_vreg_t(vgpu, PIPE_DATA_N1(display, TRANSCODER_A)) = 0x800000; + vgpu_vreg_t(vgpu, PIPE_LINK_M1(display, TRANSCODER_A)) = 0x3cd6e; + vgpu_vreg_t(vgpu, PIPE_LINK_N1(display, TRANSCODER_A)) = 0x80000; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { @@ -415,10 +419,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DPLL_CTRL2) |= DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); @@ -441,10 +445,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DPLL_CTRL2) |= DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C); vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_C << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); @@ -467,10 +471,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, DPLL_CTRL2) |= DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D); vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_D << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); @@ -508,14 +512,14 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; /* Disable Primary/Sprite/Cursor plane */ - for_each_pipe(dev_priv, pipe) { - vgpu_vreg_t(vgpu, DSPCNTR(dev_priv, pipe)) &= ~DISP_ENABLE; + for_each_pipe(display, pipe) { + vgpu_vreg_t(vgpu, DSPCNTR(display, pipe)) &= ~DISP_ENABLE; vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; - vgpu_vreg_t(vgpu, CURCNTR(dev_priv, pipe)) &= ~MCURSOR_MODE_MASK; - vgpu_vreg_t(vgpu, CURCNTR(dev_priv, pipe)) |= MCURSOR_MODE_DISABLE; + vgpu_vreg_t(vgpu, CURCNTR(display, pipe)) &= ~MCURSOR_MODE_MASK; + vgpu_vreg_t(vgpu, CURCNTR(display, pipe)) |= MCURSOR_MODE_DISABLE; } - vgpu_vreg_t(vgpu, TRANSCONF(dev_priv, TRANSCODER_A)) |= TRANSCONF_ENABLE; + vgpu_vreg_t(vgpu, TRANSCONF(display, TRANSCODER_A)) |= TRANSCONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -631,6 +635,7 @@ void vgpu_update_vblank_emulation(struct intel_vgpu *vgpu, bool turnon) static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; struct intel_vgpu_irq *irq = &vgpu->irq; int vblank_event[] = { [PIPE_A] = PIPE_A_VBLANK, @@ -652,17 +657,19 @@ static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) } if (pipe_is_enabled(vgpu, pipe)) { - vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(dev_priv, pipe))++; + vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(display, pipe))++; intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]); } } void intel_vgpu_emulate_vblank(struct intel_vgpu *vgpu) { + struct drm_i915_private *i915 = vgpu->gvt->gt->i915; + struct intel_display *display = &i915->display; int pipe; mutex_lock(&vgpu->vgpu_lock); - for_each_pipe(vgpu->gvt->gt->i915, pipe) + for_each_pipe(display, pipe) emulate_vblank_on_pipe(vgpu, pipe); mutex_unlock(&vgpu->vgpu_lock); } diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index c454e25b2b0f..15cce973e1ae 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -154,8 +154,9 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, u32 tiled, int stride_mask, int bpp) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; - u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(dev_priv, pipe)) & stride_mask; + u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(display, pipe)) & stride_mask; u32 stride = stride_reg; if (GRAPHICS_VER(dev_priv) >= 9) { @@ -210,6 +211,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, struct intel_vgpu_primary_plane_format *plane) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; u32 val, fmt; int pipe; @@ -217,7 +219,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (pipe >= I915_MAX_PIPES) return -ENODEV; - val = vgpu_vreg_t(vgpu, DSPCNTR(dev_priv, pipe)); + val = vgpu_vreg_t(vgpu, DSPCNTR(display, pipe)); plane->enabled = !!(val & DISP_ENABLE); if (!plane->enabled) return -ENODEV; @@ -251,7 +253,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; - plane->base = vgpu_vreg_t(vgpu, DSPSURF(dev_priv, pipe)) & I915_GTT_PAGE_MASK; + plane->base = vgpu_vreg_t(vgpu, DSPSURF(display, pipe)) & I915_GTT_PAGE_MASK; if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; @@ -267,14 +269,14 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, (_PRI_PLANE_STRIDE_MASK >> 6) : _PRI_PLANE_STRIDE_MASK, plane->bpp); - plane->width = (vgpu_vreg_t(vgpu, PIPESRC(dev_priv, pipe)) & _PIPE_H_SRCSZ_MASK) >> + plane->width = (vgpu_vreg_t(vgpu, PIPESRC(display, pipe)) & _PIPE_H_SRCSZ_MASK) >> _PIPE_H_SRCSZ_SHIFT; plane->width += 1; - plane->height = (vgpu_vreg_t(vgpu, PIPESRC(dev_priv, pipe)) & + plane->height = (vgpu_vreg_t(vgpu, PIPESRC(display, pipe)) & _PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT; plane->height += 1; /* raw height is one minus the real value */ - val = vgpu_vreg_t(vgpu, DSPTILEOFF(dev_priv, pipe)); + val = vgpu_vreg_t(vgpu, DSPTILEOFF(display, pipe)); plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >> _PRI_PLANE_X_OFF_SHIFT; plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >> @@ -340,6 +342,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, struct intel_vgpu_cursor_plane_format *plane) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; u32 val, mode, index; u32 alpha_plane, alpha_force; int pipe; @@ -348,7 +351,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, if (pipe >= I915_MAX_PIPES) return -ENODEV; - val = vgpu_vreg_t(vgpu, CURCNTR(dev_priv, pipe)); + val = vgpu_vreg_t(vgpu, CURCNTR(display, pipe)); mode = val & MCURSOR_MODE_MASK; plane->enabled = (mode != MCURSOR_MODE_DISABLE); if (!plane->enabled) @@ -374,7 +377,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n", alpha_plane, alpha_force); - plane->base = vgpu_vreg_t(vgpu, CURBASE(dev_priv, pipe)) & I915_GTT_PAGE_MASK; + plane->base = vgpu_vreg_t(vgpu, CURBASE(display, pipe)) & I915_GTT_PAGE_MASK; if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; @@ -385,7 +388,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, return -EINVAL; } - val = vgpu_vreg_t(vgpu, CURPOS(dev_priv, pipe)); + val = vgpu_vreg_t(vgpu, CURPOS(display, pipe)); plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT; plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT; plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9494d812c00a..241cff0fc683 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -45,6 +45,7 @@ #include "intel_mchbar_regs.h" #include "display/bxt_dpio_phy_regs.h" #include "display/i9xx_plane_regs.h" +#include "display/intel_crt_regs.h" #include "display/intel_cursor_regs.h" #include "display/intel_display_types.h" #include "display/intel_dmc_regs.h" @@ -655,11 +656,12 @@ static u32 skl_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port) static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; enum port port; u32 dp_br, link_m, link_n, htotal, vtotal; /* Find DDI/PORT assigned to TRANSCODER_A, expect B or D */ - port = (vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(dev_priv, TRANSCODER_A)) & + port = (vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(display, TRANSCODER_A)) & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; if (port != PORT_B && port != PORT_D) { gvt_dbg_dpy("vgpu-%d unsupported PORT_%c\n", vgpu->id, port_name(port)); @@ -675,12 +677,12 @@ static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu) dp_br = skl_vgpu_get_dp_bitrate(vgpu, port); /* Get DP link symbol clock M/N */ - link_m = vgpu_vreg_t(vgpu, PIPE_LINK_M1(dev_priv, TRANSCODER_A)); - link_n = vgpu_vreg_t(vgpu, PIPE_LINK_N1(dev_priv, TRANSCODER_A)); + link_m = vgpu_vreg_t(vgpu, PIPE_LINK_M1(display, TRANSCODER_A)); + link_n = vgpu_vreg_t(vgpu, PIPE_LINK_N1(display, TRANSCODER_A)); /* Get H/V total from transcoder timing */ - htotal = (vgpu_vreg_t(vgpu, TRANS_HTOTAL(dev_priv, TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT); - vtotal = (vgpu_vreg_t(vgpu, TRANS_VTOTAL(dev_priv, TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT); + htotal = (vgpu_vreg_t(vgpu, TRANS_HTOTAL(display, TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT); + vtotal = (vgpu_vreg_t(vgpu, TRANS_VTOTAL(display, TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT); if (dp_br && link_n && htotal && vtotal) { u64 pixel_clk = 0; @@ -1011,22 +1013,23 @@ static int south_chicken2_mmio_write(struct intel_vgpu *vgpu, return 0; } -#define DSPSURF_TO_PIPE(dev_priv, offset) \ - calc_index(offset, DSPSURF(dev_priv, PIPE_A), DSPSURF(dev_priv, PIPE_B), DSPSURF(dev_priv, PIPE_C)) +#define DSPSURF_TO_PIPE(display, offset) \ + calc_index(offset, DSPSURF(display, PIPE_A), DSPSURF(display, PIPE_B), DSPSURF(display, PIPE_C)) static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; - u32 pipe = DSPSURF_TO_PIPE(dev_priv, offset); + struct intel_display *display = &dev_priv->display; + u32 pipe = DSPSURF_TO_PIPE(display, offset); int event = SKL_FLIP_EVENT(pipe, PLANE_PRIMARY); write_vreg(vgpu, offset, p_data, bytes); - vgpu_vreg_t(vgpu, DSPSURFLIVE(dev_priv, pipe)) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, DSPSURFLIVE(display, pipe)) = vgpu_vreg(vgpu, offset); - vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(dev_priv, pipe))++; + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(display, pipe))++; - if (vgpu_vreg_t(vgpu, DSPCNTR(dev_priv, pipe)) & PLANE_CTL_ASYNC_FLIP) + if (vgpu_vreg_t(vgpu, DSPCNTR(display, pipe)) & PLANE_CTL_ASYNC_FLIP) intel_vgpu_trigger_virtual_event(vgpu, event); else set_bit(event, vgpu->irq.flip_done_event[pipe]); @@ -1059,14 +1062,15 @@ static int reg50080_mmio_write(struct intel_vgpu *vgpu, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; + struct intel_display *display = &dev_priv->display; enum pipe pipe = REG_50080_TO_PIPE(offset); enum plane_id plane = REG_50080_TO_PLANE(offset); int event = SKL_FLIP_EVENT(pipe, plane); write_vreg(vgpu, offset, p_data, bytes); if (plane == PLANE_PRIMARY) { - vgpu_vreg_t(vgpu, DSPSURFLIVE(dev_priv, pipe)) = vgpu_vreg(vgpu, offset); - vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(dev_priv, pipe))++; + vgpu_vreg_t(vgpu, DSPSURFLIVE(display, pipe)) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(display, pipe))++; } else { vgpu_vreg_t(vgpu, SPRSURFLIVE(pipe)) = vgpu_vreg(vgpu, offset); } @@ -2192,6 +2196,7 @@ static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu, static int init_generic_mmio_info(struct intel_gvt *gvt) { struct drm_i915_private *dev_priv = gvt->gt->i915; + struct intel_display *display = &dev_priv->display; int ret; MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, @@ -2280,21 +2285,21 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ - MMIO_DH(TRANSCONF(dev_priv, TRANSCODER_A), D_ALL, NULL, + MMIO_DH(TRANSCONF(display, TRANSCODER_A), D_ALL, NULL, pipeconf_mmio_write); - MMIO_DH(TRANSCONF(dev_priv, TRANSCODER_B), D_ALL, NULL, + MMIO_DH(TRANSCONF(display, TRANSCODER_B), D_ALL, NULL, pipeconf_mmio_write); - MMIO_DH(TRANSCONF(dev_priv, TRANSCODER_C), D_ALL, NULL, + MMIO_DH(TRANSCONF(display, TRANSCODER_C), D_ALL, NULL, pipeconf_mmio_write); - MMIO_DH(TRANSCONF(dev_priv, TRANSCODER_EDP), D_ALL, NULL, + MMIO_DH(TRANSCONF(display, TRANSCODER_EDP), D_ALL, NULL, pipeconf_mmio_write); - MMIO_DH(DSPSURF(dev_priv, PIPE_A), D_ALL, NULL, pri_surf_mmio_write); + MMIO_DH(DSPSURF(display, PIPE_A), D_ALL, NULL, pri_surf_mmio_write); MMIO_DH(REG_50080(PIPE_A, PLANE_PRIMARY), D_ALL, NULL, reg50080_mmio_write); - MMIO_DH(DSPSURF(dev_priv, PIPE_B), D_ALL, NULL, pri_surf_mmio_write); + MMIO_DH(DSPSURF(display, PIPE_B), D_ALL, NULL, pri_surf_mmio_write); MMIO_DH(REG_50080(PIPE_B, PLANE_PRIMARY), D_ALL, NULL, reg50080_mmio_write); - MMIO_DH(DSPSURF(dev_priv, PIPE_C), D_ALL, NULL, pri_surf_mmio_write); + MMIO_DH(DSPSURF(display, PIPE_C), D_ALL, NULL, pri_surf_mmio_write); MMIO_DH(REG_50080(PIPE_C, PLANE_PRIMARY), D_ALL, NULL, reg50080_mmio_write); MMIO_DH(SPRSURF(PIPE_A), D_ALL, NULL, spr_surf_mmio_write); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 35319228bc51..0dbc4e289300 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -527,24 +527,6 @@ int i915_active_acquire(struct i915_active *ref) return err; } -int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) -{ - struct i915_active_fence *active; - int err; - - err = i915_active_acquire(ref); - if (err) - return err; - - active = active_instance(ref, idx); - if (!active) { - i915_active_release(ref); - return -ENOMEM; - } - - return 0; /* return with active ref */ -} - void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 77c676ecc263..821f7c21ea9b 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -186,7 +186,6 @@ int i915_request_await_active(struct i915_request *rq, #define I915_ACTIVE_AWAIT_BARRIER BIT(2) int i915_active_acquire(struct i915_active *ref); -int i915_active_acquire_for_context(struct i915_active *ref, u64 idx); bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 1bafefb726f5..c2ae37d6b94d 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -45,6 +45,7 @@ #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> +#include "display/i9xx_display_sr.h" #include "display/intel_acpi.h" #include "display/intel_bw.h" #include "display/intel_cdclk.h" @@ -60,6 +61,7 @@ #include "display/intel_pch_refclk.h" #include "display/intel_pps.h" #include "display/intel_sprite_uapi.h" +#include "display/intel_vga.h" #include "display/skl_watermark.h" #include "gem/i915_gem_context.h" @@ -93,17 +95,20 @@ #include "i915_memcpy.h" #include "i915_perf.h" #include "i915_query.h" -#include "i915_suspend.h" +#include "i915_reg.h" #include "i915_switcheroo.h" #include "i915_sysfs.h" #include "i915_utils.h" #include "i915_vgpu.h" #include "intel_clock_gating.h" +#include "intel_cpu_info.h" #include "intel_gvt.h" #include "intel_memory_region.h" #include "intel_pci_config.h" #include "intel_pcode.h" #include "intel_region_ttm.h" +#include "intel_sbi.h" +#include "vlv_sideband.h" #include "vlv_suspend.h" static const struct drm_driver i915_drm_driver; @@ -217,6 +222,7 @@ static void sanitize_gpu(struct drm_i915_private *i915) */ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; int ret = 0; if (i915_inject_probe_failure(dev_priv)) @@ -231,8 +237,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); + intel_sbi_init(dev_priv); + vlv_iosf_sb_init(dev_priv); mutex_init(&dev_priv->sb_lock); - cpu_latency_qos_add_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE); i915_memcpy_init_early(dev_priv); intel_runtime_pm_init_early(&dev_priv->runtime_pm); @@ -259,7 +266,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_detect_pch(dev_priv); intel_irq_init(dev_priv); - intel_display_driver_early_probe(dev_priv); + intel_display_driver_early_probe(display); intel_clock_gating_hooks_init(dev_priv); intel_detect_preproduction_hw(dev_priv); @@ -282,16 +289,19 @@ err_workqueues: */ static void i915_driver_late_release(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; + intel_irq_fini(dev_priv); - intel_power_domains_cleanup(dev_priv); + intel_power_domains_cleanup(display); i915_gem_cleanup_early(dev_priv); intel_gt_driver_late_release_all(dev_priv); intel_region_ttm_device_fini(dev_priv); vlv_suspend_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); - cpu_latency_qos_remove_request(&dev_priv->sb_qos); mutex_destroy(&dev_priv->sb_lock); + vlv_iosf_sb_fini(dev_priv); + intel_sbi_fini(dev_priv); i915_params_free(&dev_priv->params); } @@ -307,6 +317,7 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) */ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_gt *gt; int ret, i; @@ -332,7 +343,7 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) /* Try to make sure MCHBAR is enabled before poking at it */ intel_gmch_bar_setup(dev_priv); intel_device_info_runtime_init(dev_priv); - intel_display_device_info_runtime_init(dev_priv); + intel_display_device_info_runtime_init(display); for_each_gt(gt, dev_priv, i) { ret = intel_gt_init_mmio(gt); @@ -415,6 +426,18 @@ mask_err: return ret; } +/* Wa_14022698537:dg2 */ +static void i915_enable_g8(struct drm_i915_private *i915) +{ + if (IS_DG2(i915)) { + if (IS_DG2_D(i915) && !intel_match_g8_cpu()) + return; + + snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_G8_ENABLE, 0, 0); + } +} + static int i915_pcode_init(struct drm_i915_private *i915) { struct intel_gt *gt; @@ -428,6 +451,7 @@ static int i915_pcode_init(struct drm_i915_private *i915) } } + i915_enable_g8(i915); return 0; } @@ -599,6 +623,7 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) */ static void i915_driver_register(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_gt *gt; unsigned int i; @@ -627,9 +652,9 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) i915_hwmon_register(dev_priv); - intel_display_driver_register(dev_priv); + intel_display_driver_register(display); - intel_power_domains_enable(dev_priv); + intel_power_domains_enable(display); intel_runtime_pm_enable(&dev_priv->runtime_pm); intel_register_dsm_handler(); @@ -644,6 +669,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) */ static void i915_driver_unregister(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_gt *gt; unsigned int i; @@ -652,9 +678,9 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_unregister_dsm_handler(); intel_runtime_pm_disable(&dev_priv->runtime_pm); - intel_power_domains_disable(dev_priv); + intel_power_domains_disable(display); - intel_display_driver_unregister(dev_priv); + intel_display_driver_unregister(display); intel_pxp_fini(dev_priv); @@ -731,7 +757,7 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) /* Set up device info and initial runtime info. */ intel_device_info_driver_create(i915, pdev->device, match_info); - intel_display_device_probe(i915); + intel_display_device_probe(pdev); return i915; } @@ -750,6 +776,7 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct drm_i915_private *i915; + struct intel_display *display; int ret; ret = pci_enable_device(pdev); @@ -764,6 +791,8 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return PTR_ERR(i915); } + display = &i915->display; + ret = i915_driver_early_probe(i915); if (ret < 0) goto out_pci_disable; @@ -784,7 +813,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto out_cleanup_mmio; - ret = intel_display_driver_probe_noirq(i915); + ret = intel_display_driver_probe_noirq(display); if (ret < 0) goto out_cleanup_hw; @@ -792,7 +821,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset; - ret = intel_display_driver_probe_nogem(i915); + ret = intel_display_driver_probe_nogem(display); if (ret) goto out_cleanup_irq; @@ -804,7 +833,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret && ret != -ENODEV) drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); - ret = intel_display_driver_probe(i915); + ret = intel_display_driver_probe(display); if (ret) goto out_cleanup_gem; @@ -824,14 +853,14 @@ out_cleanup_gem: i915_gem_driver_release(i915); out_cleanup_modeset2: /* FIXME clean up the error path */ - intel_display_driver_remove(i915); + intel_display_driver_remove(display); intel_irq_uninstall(i915); - intel_display_driver_remove_noirq(i915); + intel_display_driver_remove_noirq(display); goto out_cleanup_modeset; out_cleanup_irq: intel_irq_uninstall(i915); out_cleanup_modeset: - intel_display_driver_remove_nogem(i915); + intel_display_driver_remove_nogem(display); out_cleanup_hw: i915_driver_hw_remove(i915); intel_memory_regions_driver_release(i915); @@ -851,6 +880,7 @@ out_pci_disable: void i915_driver_remove(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; intel_wakeref_t wakeref; wakeref = intel_runtime_pm_get(&i915->runtime_pm); @@ -864,16 +894,16 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_gvt_driver_remove(i915); - intel_display_driver_remove(i915); + intel_display_driver_remove(display); intel_irq_uninstall(i915); - intel_display_driver_remove_noirq(i915); + intel_display_driver_remove_noirq(display); i915_reset_error_state(i915); i915_gem_driver_remove(i915); - intel_display_driver_remove_nogem(i915); + intel_display_driver_remove_nogem(display); i915_driver_hw_remove(i915); @@ -883,6 +913,7 @@ void i915_driver_remove(struct drm_i915_private *i915) static void i915_driver_release(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; intel_wakeref_t wakeref; @@ -906,7 +937,7 @@ static void i915_driver_release(struct drm_device *dev) i915_driver_late_release(dev_priv); - intel_display_device_remove(dev_priv); + intel_display_device_remove(display); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -936,25 +967,27 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) void i915_driver_shutdown(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + disable_rpm_wakeref_asserts(&i915->runtime_pm); intel_runtime_pm_disable(&i915->runtime_pm); - intel_power_domains_disable(i915); + intel_power_domains_disable(display); intel_fbdev_set_suspend(&i915->drm, FBINFO_STATE_SUSPENDED, true); if (HAS_DISPLAY(i915)) { drm_kms_helper_poll_disable(&i915->drm); - intel_display_driver_disable_user_access(i915); + intel_display_driver_disable_user_access(display); drm_atomic_helper_shutdown(&i915->drm); } - intel_dp_mst_suspend(i915); + intel_dp_mst_suspend(display); intel_irq_suspend(i915); intel_hpd_cancel_work(i915); if (HAS_DISPLAY(i915)) - intel_display_driver_suspend_access(i915); + intel_display_driver_suspend_access(display); intel_encoder_suspend_all(&i915->display); intel_encoder_shutdown_all(&i915->display); @@ -974,7 +1007,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) * - unify the driver remove and system/runtime suspend sequences with * the above unified shutdown/poweroff sequence. */ - intel_power_domains_driver_remove(i915); + intel_power_domains_driver_remove(display); enable_rpm_wakeref_asserts(&i915->runtime_pm); intel_runtime_pm_driver_last_release(&i915->runtime_pm); @@ -1022,24 +1055,22 @@ static int i915_drm_suspend(struct drm_device *dev) /* We do a lot of poking in a lot of registers, make sure they work * properly. */ - intel_power_domains_disable(dev_priv); + intel_power_domains_disable(display); intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); if (HAS_DISPLAY(dev_priv)) { drm_kms_helper_poll_disable(dev); - intel_display_driver_disable_user_access(dev_priv); + intel_display_driver_disable_user_access(display); } pci_save_state(pdev); - intel_display_driver_suspend(dev_priv); - - intel_dp_mst_suspend(dev_priv); + intel_display_driver_suspend(display); intel_irq_suspend(dev_priv); intel_hpd_cancel_work(dev_priv); if (HAS_DISPLAY(dev_priv)) - intel_display_driver_suspend_access(dev_priv); + intel_display_driver_suspend_access(display); intel_encoder_suspend_all(&dev_priv->display); @@ -1047,7 +1078,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_dpt_suspend(dev_priv); i915_ggtt_suspend(to_gt(dev_priv)->ggtt); - i915_save_display(dev_priv); + i9xx_display_sr_save(display); opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold; intel_opregion_suspend(display, opregion_target_state); @@ -1066,6 +1097,7 @@ static int i915_drm_suspend(struct drm_device *dev) static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct intel_gt *gt; @@ -1081,14 +1113,12 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) for_each_gt(gt, dev_priv, i) intel_uncore_suspend(gt->uncore); - intel_power_domains_suspend(dev_priv, s2idle); - - intel_display_power_suspend_late(dev_priv); + intel_display_power_suspend_late(display, s2idle); ret = vlv_suspend_complete(dev_priv); if (ret) { drm_err(&dev_priv->drm, "Suspend complete failed: %d\n", ret); - intel_power_domains_resume(dev_priv); + intel_display_power_resume_early(display); goto out; } @@ -1166,7 +1196,12 @@ static int i915_drm_resume(struct drm_device *dev) intel_dmc_resume(display); - i915_restore_display(dev_priv); + i9xx_display_sr_restore(display); + + intel_vga_redisable(display); + + intel_gmbus_reset(display); + intel_pps_unlock_regs_wa(display); intel_init_pch_refclk(dev_priv); @@ -1188,21 +1223,19 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); - intel_display_driver_init_hw(dev_priv); + intel_display_driver_init_hw(display); intel_clock_gating_init(dev_priv); if (HAS_DISPLAY(dev_priv)) - intel_display_driver_resume_access(dev_priv); + intel_display_driver_resume_access(display); intel_hpd_init(dev_priv); - /* MST sideband requires HPD interrupts enabled */ - intel_dp_mst_resume(dev_priv); - intel_display_driver_resume(dev_priv); + intel_display_driver_resume(display); if (HAS_DISPLAY(dev_priv)) { - intel_display_driver_enable_user_access(dev_priv); + intel_display_driver_enable_user_access(display); drm_kms_helper_poll_enable(dev); } intel_hpd_poll_disable(dev_priv); @@ -1211,7 +1244,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); - intel_power_domains_enable(dev_priv); + intel_power_domains_enable(display); intel_gvt_resume(dev_priv); @@ -1223,6 +1256,7 @@ static int i915_drm_resume(struct drm_device *dev) static int i915_drm_resume_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct intel_gt *gt; int ret, i; @@ -1282,9 +1316,7 @@ static int i915_drm_resume_early(struct drm_device *dev) for_each_gt(gt, dev_priv, i) intel_gt_resume_early(gt); - intel_display_power_resume_early(dev_priv); - - intel_power_domains_resume(dev_priv); + intel_display_power_resume_early(display); enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); @@ -1486,7 +1518,7 @@ static int intel_runtime_suspend(struct device *kdev) for_each_gt(gt, dev_priv, i) intel_uncore_suspend(gt->uncore); - intel_display_power_suspend(dev_priv); + intel_display_power_suspend(display); ret = vlv_suspend_complete(dev_priv); if (ret) { @@ -1580,7 +1612,7 @@ static int intel_runtime_resume(struct device *kdev) drm_dbg(&dev_priv->drm, "Unclaimed access during suspend, bios?\n"); - intel_display_power_resume(dev_priv); + intel_display_power_resume(display); ret = vlv_resume_prepare(dev_priv, true); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7b1a061d92fb..b96b8de12756 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -101,14 +101,6 @@ struct i915_dsm { resource_size_t usable_size; }; -struct i915_suspend_saved_registers { - u32 saveDSPARB; - u32 saveSWF0[16]; - u32 saveSWF1[16]; - u32 saveSWF3[3]; - u16 saveGCDGMBUS; -}; - #define MAX_L3_SLICES 2 struct intel_l3_parity { u32 *remap_info[MAX_L3_SLICES]; @@ -236,9 +228,17 @@ struct drm_i915_private { spinlock_t irq_lock; bool irqs_enabled; + /* LPT/WPT IOSF sideband protection */ + struct mutex sbi_lock; + + /* VLV/CHV IOSF sideband */ + struct { + struct mutex lock; /* protect sideband access */ + struct pm_qos_request qos; + } vlv_iosf_sb; + /* Sideband mailbox protection */ struct mutex sb_lock; - struct pm_qos_request sb_qos; /** Cached value of IMR to avoid reads in updating the bitfield */ u32 irq_mask; @@ -291,7 +291,6 @@ struct drm_i915_private { struct i915_gpu_error gpu_error; u32 suspend_count; - struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state *vlv_s0ix_state; struct dram_info { @@ -550,6 +549,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G11) #define IS_DG2_G12(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G12) +#define IS_DG2_D(i915) \ + IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_D) #define IS_RAPTORLAKE_S(i915) \ IS_SUBPLATFORM(i915, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL) #define IS_ALDERLAKE_P_N(i915) \ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a9662cc6ed1e..25295eb626dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -71,7 +71,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) * @vm: the &struct i915_address_space * @ww: An optional struct i915_gem_ww_ctx. - * @node: the &struct drm_mm_node (typically i915_vma.mode) + * @node: the &struct drm_mm_node (typically i915_vma.node) * @size: how much space to allocate inside the GTT, * must be #I915_GTT_PAGE_SIZE aligned * @offset: where to insert inside the GTT, diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index a62405787e77..be8149e46281 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -2,9 +2,9 @@ * SPDX-License-Identifier: MIT */ +#include "display/intel_overlay.h" #include "gem/i915_gem_mman.h" #include "gt/intel_engine_user.h" - #include "pxp/intel_pxp.h" #include "i915_cmd_parser.h" @@ -16,6 +16,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_i915_private *i915 = to_i915(dev); + struct intel_display *display = &i915->display; struct pci_dev *pdev = to_pci_dev(dev->dev); const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu; drm_i915_getparam_t *param = data; @@ -38,7 +39,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = to_gt(i915)->ggtt->num_fences; break; case I915_PARAM_HAS_OVERLAY: - value = !!i915->display.overlay; + value = intel_overlay_available(display); break; case I915_PARAM_HAS_BSD: value = !!intel_engine_lookup_user(i915, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eb975b3815df..819ab933bb10 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1642,9 +1642,21 @@ capture_engine(struct intel_engine_cs *engine, return NULL; intel_engine_get_hung_entity(engine, &ce, &rq); - if (rq && !i915_request_started(rq)) - drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", - engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); + if (rq && !i915_request_started(rq)) { + /* + * We want to know also what is the guc_id of the context, + * but if we don't have the context reference, then skip + * printing it. + */ + if (ce) + drm_info(&engine->gt->i915->drm, + "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", + engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); + else + drm_info(&engine->gt->i915->drm, + "Got hung context on %s with active request %lld:%lld not yet started\n", + engine->name, rq->fence.context, rq->fence.seqno); + } if (rq) { capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f75cbf5b8a1c..7920ad9585ae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -658,8 +658,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) gen5_gt_irq_reset(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.irq.display_irqs_enabled) - vlv_display_irq_reset(dev_priv); + vlv_display_irq_reset(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); } @@ -723,8 +722,7 @@ static void cherryview_irq_reset(struct drm_i915_private *dev_priv) gen2_irq_reset(uncore, GEN8_PCU_IRQ_REGS); spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.irq.display_irqs_enabled) - vlv_display_irq_reset(dev_priv); + vlv_display_irq_reset(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); } @@ -740,8 +738,7 @@ static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv) gen5_gt_irq_postinstall(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.irq.display_irqs_enabled) - vlv_display_irq_postinstall(dev_priv); + vlv_display_irq_postinstall(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); @@ -794,8 +791,7 @@ static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) gen8_gt_irq_postinstall(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.irq.display_irqs_enabled) - vlv_display_irq_postinstall(dev_priv); + vlv_display_irq_postinstall(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); intel_uncore_write(&dev_priv->uncore, GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index f5c97a620962..76e2801619f0 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -143,8 +143,8 @@ int remap_io_sg(struct vm_area_struct *vma, /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); - while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) { - offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT; + while (offset >= r.sgt.max >> PAGE_SHIFT) { + offset -= r.sgt.max >> PAGE_SHIFT; r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase)); if (!r.sgt.sgp) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2406cda75b7b..5384d1bb4923 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4802,7 +4802,7 @@ err_unlock: return ret; } -static struct ctl_table oa_table[] = { +static const struct ctl_table oa_table[] = { { .procname = "perf_stream_paranoid", .data = &i915_perf_stream_paranoid, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 93fbf53578da..e55db036be1b 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -302,7 +302,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -324,7 +324,7 @@ void i915_pmu_gt_unparked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -626,7 +626,7 @@ static int i915_pmu_event_init(struct perf_event *event) struct drm_i915_private *i915 = pmu_to_i915(pmu); int ret; - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (event->attr.type != event->pmu->type) @@ -724,7 +724,7 @@ static void i915_pmu_event_read(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 prev, new; - if (pmu->closed) { + if (!pmu->registered) { event->hw.state = PERF_HES_STOPPED; return; } @@ -850,7 +850,7 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return; i915_pmu_enable(event); @@ -861,7 +861,7 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) goto out; if (flags & PERF_EF_UPDATE) @@ -877,7 +877,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (flags & PERF_EF_START) @@ -1177,8 +1177,6 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - GEM_BUG_ON(!pmu->base.event_init); - /* Select the first online CPU as a designated reader. */ if (cpumask_empty(&i915_pmu_cpumask)) cpumask_set_cpu(cpu, &i915_pmu_cpumask); @@ -1191,13 +1189,11 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target = i915_pmu_target_cpu; - GEM_BUG_ON(!pmu->base.event_init); - /* * Unregistering an instance generates a CPU offline event which we must * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. */ - if (pmu->closed) + if (!pmu->registered) return 0; if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { @@ -1218,7 +1214,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; +static enum cpuhp_state cpuhp_state = CPUHP_INVALID; int i915_pmu_init(void) { @@ -1232,28 +1228,28 @@ int i915_pmu_init(void) pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n", ret); else - cpuhp_slot = ret; + cpuhp_state = ret; return 0; } void i915_pmu_exit(void) { - if (cpuhp_slot != CPUHP_INVALID) - cpuhp_remove_multi_state(cpuhp_slot); + if (cpuhp_state != CPUHP_INVALID) + cpuhp_remove_multi_state(cpuhp_state); } static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { - if (cpuhp_slot == CPUHP_INVALID) + if (cpuhp_state == CPUHP_INVALID) return -EINVAL; - return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); + return cpuhp_state_add_instance(cpuhp_state, &pmu->cpuhp.node); } static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); + cpuhp_state_remove_instance(cpuhp_state, &pmu->cpuhp.node); } void i915_pmu_register(struct drm_i915_private *i915) @@ -1265,7 +1261,6 @@ void i915_pmu_register(struct drm_i915_private *i915) &i915_pmu_cpumask_attr_group, NULL }; - int ret = -ENOMEM; spin_lock_init(&pmu->lock); @@ -1316,6 +1311,8 @@ void i915_pmu_register(struct drm_i915_private *i915) if (ret) goto err_unreg; + pmu->registered = true; + return; err_unreg: @@ -1323,7 +1320,6 @@ err_unreg: err_groups: kfree(pmu->base.attr_groups); err_attr: - pmu->base.event_init = NULL; free_event_attributes(pmu); err_name: if (IS_DGFX(i915)) @@ -1336,23 +1332,17 @@ void i915_pmu_unregister(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; - /* - * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu - * ensures all currently executing ones will have exited before we - * proceed with unregistration. - */ - pmu->closed = true; - synchronize_rcu(); + /* Disconnect the PMU callbacks */ + pmu->registered = false; hrtimer_cancel(&pmu->timer); i915_pmu_unregister_cpuhp_state(pmu); perf_pmu_unregister(&pmu->base); - pmu->base.event_init = NULL; kfree(pmu->base.attr_groups); if (IS_DGFX(i915)) kfree(pmu->name); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 41af038c3738..8e66d63d0c9f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -68,9 +68,9 @@ struct i915_pmu { */ struct pmu base; /** - * @closed: i915 is unregistering. + * @registered: PMU is registered and not in the unregistering process. */ - bool closed; + bool registered; /** * @name: Name as registered with perf core. */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 22be4a731d27..765e6c0528fb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -144,8 +144,6 @@ #define GEN6_STOLEN_RESERVED_ENABLE (1 << 0) #define GEN11_STOLEN_RESERVED_ADDR_MASK (0xFFFFFFFFFFFULL << 20) -#define _VGA_MSR_WRITE _MMIO(0x3c2) - #define _GEN7_PIPEA_DE_LOAD_SL 0x70068 #define _GEN7_PIPEB_DE_LOAD_SL 0x71068 #define GEN7_PIPE_DE_LOAD_SL(pipe) _MMIO_PIPE(pipe, _GEN7_PIPEA_DE_LOAD_SL, _GEN7_PIPEB_DE_LOAD_SL) @@ -1069,11 +1067,6 @@ #define CLKGATE_DIS_PSL_EXT(pipe) \ _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_EXT_A, _CLKGATE_DIS_PSL_EXT_B) -/* DDI Buffer Control */ -#define _DDI_CLK_VALFREQ_A 0x64030 -#define _DDI_CLK_VALFREQ_B 0x64130 -#define DDI_CLK_VALFREQ(port) _MMIO_PORT(port, _DDI_CLK_VALFREQ_A, _DDI_CLK_VALFREQ_B) - /* * Display engine regs */ @@ -1147,53 +1140,6 @@ #define _TRANS_MULT_B 0x6102c #define TRANS_MULT(dev_priv, trans) _MMIO_TRANS2(dev_priv, (trans), _TRANS_MULT_A) -/* VGA port control */ -#define ADPA _MMIO(0x61100) -#define PCH_ADPA _MMIO(0xe1100) -#define VLV_ADPA _MMIO(VLV_DISPLAY_BASE + 0x61100) -#define ADPA_DAC_ENABLE (1 << 31) -#define ADPA_DAC_DISABLE 0 -#define ADPA_PIPE_SEL_SHIFT 30 -#define ADPA_PIPE_SEL_MASK (1 << 30) -#define ADPA_PIPE_SEL(pipe) ((pipe) << 30) -#define ADPA_PIPE_SEL_SHIFT_CPT 29 -#define ADPA_PIPE_SEL_MASK_CPT (3 << 29) -#define ADPA_PIPE_SEL_CPT(pipe) ((pipe) << 29) -#define ADPA_CRT_HOTPLUG_MASK 0x03ff0000 /* bit 25-16 */ -#define ADPA_CRT_HOTPLUG_MONITOR_NONE (0 << 24) -#define ADPA_CRT_HOTPLUG_MONITOR_MASK (3 << 24) -#define ADPA_CRT_HOTPLUG_MONITOR_COLOR (3 << 24) -#define ADPA_CRT_HOTPLUG_MONITOR_MONO (2 << 24) -#define ADPA_CRT_HOTPLUG_ENABLE (1 << 23) -#define ADPA_CRT_HOTPLUG_PERIOD_64 (0 << 22) -#define ADPA_CRT_HOTPLUG_PERIOD_128 (1 << 22) -#define ADPA_CRT_HOTPLUG_WARMUP_5MS (0 << 21) -#define ADPA_CRT_HOTPLUG_WARMUP_10MS (1 << 21) -#define ADPA_CRT_HOTPLUG_SAMPLE_2S (0 << 20) -#define ADPA_CRT_HOTPLUG_SAMPLE_4S (1 << 20) -#define ADPA_CRT_HOTPLUG_VOLTAGE_40 (0 << 18) -#define ADPA_CRT_HOTPLUG_VOLTAGE_50 (1 << 18) -#define ADPA_CRT_HOTPLUG_VOLTAGE_60 (2 << 18) -#define ADPA_CRT_HOTPLUG_VOLTAGE_70 (3 << 18) -#define ADPA_CRT_HOTPLUG_VOLREF_325MV (0 << 17) -#define ADPA_CRT_HOTPLUG_VOLREF_475MV (1 << 17) -#define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1 << 16) -#define ADPA_USE_VGA_HVPOLARITY (1 << 15) -#define ADPA_SETS_HVPOLARITY 0 -#define ADPA_VSYNC_CNTL_DISABLE (1 << 10) -#define ADPA_VSYNC_CNTL_ENABLE 0 -#define ADPA_HSYNC_CNTL_DISABLE (1 << 11) -#define ADPA_HSYNC_CNTL_ENABLE 0 -#define ADPA_VSYNC_ACTIVE_HIGH (1 << 4) -#define ADPA_VSYNC_ACTIVE_LOW 0 -#define ADPA_HSYNC_ACTIVE_HIGH (1 << 3) -#define ADPA_HSYNC_ACTIVE_LOW 0 -#define ADPA_DPMS_MASK (~(3 << 10)) -#define ADPA_DPMS_ON (0 << 10) -#define ADPA_DPMS_SUSPEND (1 << 10) -#define ADPA_DPMS_STANDBY (2 << 10) -#define ADPA_DPMS_OFF (3 << 10) - /* Hotplug control (945+ only) */ #define PORT_HOTPLUG_EN(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x61110) #define PORTB_HOTPLUG_INT_EN (1 << 29) @@ -1786,180 +1732,6 @@ #define SPRITEA_INVALID_GTT_STATUS REG_BIT(1) #define PLANEA_INVALID_GTT_STATUS REG_BIT(0) -#define DSPARB(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70030) -#define DSPARB_CSTART_MASK (0x7f << 7) -#define DSPARB_CSTART_SHIFT 7 -#define DSPARB_BSTART_MASK (0x7f) -#define DSPARB_BSTART_SHIFT 0 -#define DSPARB_BEND_SHIFT 9 /* on 855 */ -#define DSPARB_AEND_SHIFT 0 -#define DSPARB_SPRITEA_SHIFT_VLV 0 -#define DSPARB_SPRITEA_MASK_VLV (0xff << 0) -#define DSPARB_SPRITEB_SHIFT_VLV 8 -#define DSPARB_SPRITEB_MASK_VLV (0xff << 8) -#define DSPARB_SPRITEC_SHIFT_VLV 16 -#define DSPARB_SPRITEC_MASK_VLV (0xff << 16) -#define DSPARB_SPRITED_SHIFT_VLV 24 -#define DSPARB_SPRITED_MASK_VLV (0xff << 24) -#define DSPARB2 _MMIO(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */ -#define DSPARB_SPRITEA_HI_SHIFT_VLV 0 -#define DSPARB_SPRITEA_HI_MASK_VLV (0x1 << 0) -#define DSPARB_SPRITEB_HI_SHIFT_VLV 4 -#define DSPARB_SPRITEB_HI_MASK_VLV (0x1 << 4) -#define DSPARB_SPRITEC_HI_SHIFT_VLV 8 -#define DSPARB_SPRITEC_HI_MASK_VLV (0x1 << 8) -#define DSPARB_SPRITED_HI_SHIFT_VLV 12 -#define DSPARB_SPRITED_HI_MASK_VLV (0x1 << 12) -#define DSPARB_SPRITEE_HI_SHIFT_VLV 16 -#define DSPARB_SPRITEE_HI_MASK_VLV (0x1 << 16) -#define DSPARB_SPRITEF_HI_SHIFT_VLV 20 -#define DSPARB_SPRITEF_HI_MASK_VLV (0x1 << 20) -#define DSPARB3 _MMIO(VLV_DISPLAY_BASE + 0x7006c) /* chv */ -#define DSPARB_SPRITEE_SHIFT_VLV 0 -#define DSPARB_SPRITEE_MASK_VLV (0xff << 0) -#define DSPARB_SPRITEF_SHIFT_VLV 8 -#define DSPARB_SPRITEF_MASK_VLV (0xff << 8) - -/* pnv/gen4/g4x/vlv/chv */ -#define DSPFW1(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70034) -#define DSPFW_SR_SHIFT 23 -#define DSPFW_SR_MASK (0x1ff << 23) -#define DSPFW_CURSORB_SHIFT 16 -#define DSPFW_CURSORB_MASK (0x3f << 16) -#define DSPFW_PLANEB_SHIFT 8 -#define DSPFW_PLANEB_MASK (0x7f << 8) -#define DSPFW_PLANEB_MASK_VLV (0xff << 8) /* vlv/chv */ -#define DSPFW_PLANEA_SHIFT 0 -#define DSPFW_PLANEA_MASK (0x7f << 0) -#define DSPFW_PLANEA_MASK_VLV (0xff << 0) /* vlv/chv */ -#define DSPFW2(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x70038) -#define DSPFW_FBC_SR_EN (1 << 31) /* g4x */ -#define DSPFW_FBC_SR_SHIFT 28 -#define DSPFW_FBC_SR_MASK (0x7 << 28) /* g4x */ -#define DSPFW_FBC_HPLL_SR_SHIFT 24 -#define DSPFW_FBC_HPLL_SR_MASK (0xf << 24) /* g4x */ -#define DSPFW_SPRITEB_SHIFT (16) -#define DSPFW_SPRITEB_MASK (0x7f << 16) /* g4x */ -#define DSPFW_SPRITEB_MASK_VLV (0xff << 16) /* vlv/chv */ -#define DSPFW_CURSORA_SHIFT 8 -#define DSPFW_CURSORA_MASK (0x3f << 8) -#define DSPFW_PLANEC_OLD_SHIFT 0 -#define DSPFW_PLANEC_OLD_MASK (0x7f << 0) /* pre-gen4 sprite C */ -#define DSPFW_SPRITEA_SHIFT 0 -#define DSPFW_SPRITEA_MASK (0x7f << 0) /* g4x */ -#define DSPFW_SPRITEA_MASK_VLV (0xff << 0) /* vlv/chv */ -#define DSPFW3(dev_priv) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x7003c) -#define DSPFW_HPLL_SR_EN (1 << 31) -#define PINEVIEW_SELF_REFRESH_EN (1 << 30) -#define DSPFW_CURSOR_SR_SHIFT 24 -#define DSPFW_CURSOR_SR_MASK (0x3f << 24) -#define DSPFW_HPLL_CURSOR_SHIFT 16 -#define DSPFW_HPLL_CURSOR_MASK (0x3f << 16) -#define DSPFW_HPLL_SR_SHIFT 0 -#define DSPFW_HPLL_SR_MASK (0x1ff << 0) - -/* vlv/chv */ -#define DSPFW4 _MMIO(VLV_DISPLAY_BASE + 0x70070) -#define DSPFW_SPRITEB_WM1_SHIFT 16 -#define DSPFW_SPRITEB_WM1_MASK (0xff << 16) -#define DSPFW_CURSORA_WM1_SHIFT 8 -#define DSPFW_CURSORA_WM1_MASK (0x3f << 8) -#define DSPFW_SPRITEA_WM1_SHIFT 0 -#define DSPFW_SPRITEA_WM1_MASK (0xff << 0) -#define DSPFW5 _MMIO(VLV_DISPLAY_BASE + 0x70074) -#define DSPFW_PLANEB_WM1_SHIFT 24 -#define DSPFW_PLANEB_WM1_MASK (0xff << 24) -#define DSPFW_PLANEA_WM1_SHIFT 16 -#define DSPFW_PLANEA_WM1_MASK (0xff << 16) -#define DSPFW_CURSORB_WM1_SHIFT 8 -#define DSPFW_CURSORB_WM1_MASK (0x3f << 8) -#define DSPFW_CURSOR_SR_WM1_SHIFT 0 -#define DSPFW_CURSOR_SR_WM1_MASK (0x3f << 0) -#define DSPFW6 _MMIO(VLV_DISPLAY_BASE + 0x70078) -#define DSPFW_SR_WM1_SHIFT 0 -#define DSPFW_SR_WM1_MASK (0x1ff << 0) -#define DSPFW7 _MMIO(VLV_DISPLAY_BASE + 0x7007c) -#define DSPFW7_CHV _MMIO(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */ -#define DSPFW_SPRITED_WM1_SHIFT 24 -#define DSPFW_SPRITED_WM1_MASK (0xff << 24) -#define DSPFW_SPRITED_SHIFT 16 -#define DSPFW_SPRITED_MASK_VLV (0xff << 16) -#define DSPFW_SPRITEC_WM1_SHIFT 8 -#define DSPFW_SPRITEC_WM1_MASK (0xff << 8) -#define DSPFW_SPRITEC_SHIFT 0 -#define DSPFW_SPRITEC_MASK_VLV (0xff << 0) -#define DSPFW8_CHV _MMIO(VLV_DISPLAY_BASE + 0x700b8) -#define DSPFW_SPRITEF_WM1_SHIFT 24 -#define DSPFW_SPRITEF_WM1_MASK (0xff << 24) -#define DSPFW_SPRITEF_SHIFT 16 -#define DSPFW_SPRITEF_MASK_VLV (0xff << 16) -#define DSPFW_SPRITEE_WM1_SHIFT 8 -#define DSPFW_SPRITEE_WM1_MASK (0xff << 8) -#define DSPFW_SPRITEE_SHIFT 0 -#define DSPFW_SPRITEE_MASK_VLV (0xff << 0) -#define DSPFW9_CHV _MMIO(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */ -#define DSPFW_PLANEC_WM1_SHIFT 24 -#define DSPFW_PLANEC_WM1_MASK (0xff << 24) -#define DSPFW_PLANEC_SHIFT 16 -#define DSPFW_PLANEC_MASK_VLV (0xff << 16) -#define DSPFW_CURSORC_WM1_SHIFT 8 -#define DSPFW_CURSORC_WM1_MASK (0x3f << 16) -#define DSPFW_CURSORC_SHIFT 0 -#define DSPFW_CURSORC_MASK (0x3f << 0) - -/* vlv/chv high order bits */ -#define DSPHOWM _MMIO(VLV_DISPLAY_BASE + 0x70064) -#define DSPFW_SR_HI_SHIFT 24 -#define DSPFW_SR_HI_MASK (3 << 24) /* 2 bits for chv, 1 for vlv */ -#define DSPFW_SPRITEF_HI_SHIFT 23 -#define DSPFW_SPRITEF_HI_MASK (1 << 23) -#define DSPFW_SPRITEE_HI_SHIFT 22 -#define DSPFW_SPRITEE_HI_MASK (1 << 22) -#define DSPFW_PLANEC_HI_SHIFT 21 -#define DSPFW_PLANEC_HI_MASK (1 << 21) -#define DSPFW_SPRITED_HI_SHIFT 20 -#define DSPFW_SPRITED_HI_MASK (1 << 20) -#define DSPFW_SPRITEC_HI_SHIFT 16 -#define DSPFW_SPRITEC_HI_MASK (1 << 16) -#define DSPFW_PLANEB_HI_SHIFT 12 -#define DSPFW_PLANEB_HI_MASK (1 << 12) -#define DSPFW_SPRITEB_HI_SHIFT 8 -#define DSPFW_SPRITEB_HI_MASK (1 << 8) -#define DSPFW_SPRITEA_HI_SHIFT 4 -#define DSPFW_SPRITEA_HI_MASK (1 << 4) -#define DSPFW_PLANEA_HI_SHIFT 0 -#define DSPFW_PLANEA_HI_MASK (1 << 0) -#define DSPHOWM1 _MMIO(VLV_DISPLAY_BASE + 0x70068) -#define DSPFW_SR_WM1_HI_SHIFT 24 -#define DSPFW_SR_WM1_HI_MASK (3 << 24) /* 2 bits for chv, 1 for vlv */ -#define DSPFW_SPRITEF_WM1_HI_SHIFT 23 -#define DSPFW_SPRITEF_WM1_HI_MASK (1 << 23) -#define DSPFW_SPRITEE_WM1_HI_SHIFT 22 -#define DSPFW_SPRITEE_WM1_HI_MASK (1 << 22) -#define DSPFW_PLANEC_WM1_HI_SHIFT 21 -#define DSPFW_PLANEC_WM1_HI_MASK (1 << 21) -#define DSPFW_SPRITED_WM1_HI_SHIFT 20 -#define DSPFW_SPRITED_WM1_HI_MASK (1 << 20) -#define DSPFW_SPRITEC_WM1_HI_SHIFT 16 -#define DSPFW_SPRITEC_WM1_HI_MASK (1 << 16) -#define DSPFW_PLANEB_WM1_HI_SHIFT 12 -#define DSPFW_PLANEB_WM1_HI_MASK (1 << 12) -#define DSPFW_SPRITEB_WM1_HI_SHIFT 8 -#define DSPFW_SPRITEB_WM1_HI_MASK (1 << 8) -#define DSPFW_SPRITEA_WM1_HI_SHIFT 4 -#define DSPFW_SPRITEA_WM1_HI_MASK (1 << 4) -#define DSPFW_PLANEA_WM1_HI_SHIFT 0 -#define DSPFW_PLANEA_WM1_HI_MASK (1 << 0) - -/* drain latency register values*/ -#define VLV_DDL(pipe) _MMIO(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe)) -#define DDL_CURSOR_SHIFT 24 -#define DDL_SPRITE_SHIFT(sprite) (8 + 8 * (sprite)) -#define DDL_PLANE_SHIFT 0 -#define DDL_PRECISION_HIGH (1 << 7) -#define DDL_PRECISION_LOW (0 << 7) -#define DRAIN_LATENCY_MASK 0x7f - #define CBR1_VLV _MMIO(VLV_DISPLAY_BASE + 0x70400) #define CBR_PND_DEADLINE_DISABLE (1 << 31) #define CBR_PWM_CLOCK_MUX_SELECT (1 << 30) @@ -1967,72 +1739,6 @@ #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) #define CBR_DPLLBMD_PIPE(pipe) (1 << (7 + (pipe) * 11)) /* pipes B and C */ -/* FIFO watermark sizes etc */ -#define G4X_FIFO_LINE_SIZE 64 -#define I915_FIFO_LINE_SIZE 64 -#define I830_FIFO_LINE_SIZE 32 - -#define VALLEYVIEW_FIFO_SIZE 255 -#define G4X_FIFO_SIZE 127 -#define I965_FIFO_SIZE 512 -#define I945_FIFO_SIZE 127 -#define I915_FIFO_SIZE 95 -#define I855GM_FIFO_SIZE 127 /* In cachelines */ -#define I830_FIFO_SIZE 95 - -#define VALLEYVIEW_MAX_WM 0xff -#define G4X_MAX_WM 0x3f -#define I915_MAX_WM 0x3f - -#define PINEVIEW_DISPLAY_FIFO 512 /* in 64byte unit */ -#define PINEVIEW_FIFO_LINE_SIZE 64 -#define PINEVIEW_MAX_WM 0x1ff -#define PINEVIEW_DFT_WM 0x3f -#define PINEVIEW_DFT_HPLLOFF_WM 0 -#define PINEVIEW_GUARD_WM 10 -#define PINEVIEW_CURSOR_FIFO 64 -#define PINEVIEW_CURSOR_MAX_WM 0x3f -#define PINEVIEW_CURSOR_DFT_WM 0 -#define PINEVIEW_CURSOR_GUARD_WM 5 - -#define VALLEYVIEW_CURSOR_MAX_WM 64 -#define I965_CURSOR_FIFO 64 -#define I965_CURSOR_MAX_WM 32 -#define I965_CURSOR_DFT_WM 8 - -/* define the Watermark register on Ironlake */ -#define _WM0_PIPEA_ILK 0x45100 -#define _WM0_PIPEB_ILK 0x45104 -#define _WM0_PIPEC_IVB 0x45200 -#define WM0_PIPE_ILK(pipe) _MMIO_BASE_PIPE3(0, (pipe), _WM0_PIPEA_ILK, \ - _WM0_PIPEB_ILK, _WM0_PIPEC_IVB) -#define WM0_PIPE_PRIMARY_MASK REG_GENMASK(31, 16) -#define WM0_PIPE_SPRITE_MASK REG_GENMASK(15, 8) -#define WM0_PIPE_CURSOR_MASK REG_GENMASK(7, 0) -#define WM0_PIPE_PRIMARY(x) REG_FIELD_PREP(WM0_PIPE_PRIMARY_MASK, (x)) -#define WM0_PIPE_SPRITE(x) REG_FIELD_PREP(WM0_PIPE_SPRITE_MASK, (x)) -#define WM0_PIPE_CURSOR(x) REG_FIELD_PREP(WM0_PIPE_CURSOR_MASK, (x)) -#define WM1_LP_ILK _MMIO(0x45108) -#define WM2_LP_ILK _MMIO(0x4510c) -#define WM3_LP_ILK _MMIO(0x45110) -#define WM_LP_ENABLE REG_BIT(31) -#define WM_LP_LATENCY_MASK REG_GENMASK(30, 24) -#define WM_LP_FBC_MASK_BDW REG_GENMASK(23, 19) -#define WM_LP_FBC_MASK_ILK REG_GENMASK(23, 20) -#define WM_LP_PRIMARY_MASK REG_GENMASK(18, 8) -#define WM_LP_CURSOR_MASK REG_GENMASK(7, 0) -#define WM_LP_LATENCY(x) REG_FIELD_PREP(WM_LP_LATENCY_MASK, (x)) -#define WM_LP_FBC_BDW(x) REG_FIELD_PREP(WM_LP_FBC_MASK_BDW, (x)) -#define WM_LP_FBC_ILK(x) REG_FIELD_PREP(WM_LP_FBC_MASK_ILK, (x)) -#define WM_LP_PRIMARY(x) REG_FIELD_PREP(WM_LP_PRIMARY_MASK, (x)) -#define WM_LP_CURSOR(x) REG_FIELD_PREP(WM_LP_CURSOR_MASK, (x)) -#define WM1S_LP_ILK _MMIO(0x45120) -#define WM2S_LP_IVB _MMIO(0x45124) -#define WM3S_LP_IVB _MMIO(0x45128) -#define WM_LP_SPRITE_ENABLE REG_BIT(31) /* ilk/snb WM1S only */ -#define WM_LP_SPRITE_MASK REG_GENMASK(10, 0) -#define WM_LP_SPRITE(x) REG_FIELD_PREP(WM_LP_SPRITE_MASK, (x)) - /* * The two pipe frame counter registers are not synchronized, so * reading a stable value is somewhat tricky. The following code @@ -2802,7 +2508,7 @@ #define _CHICKEN_TRANS_C 0x420c8 #define _CHICKEN_TRANS_EDP 0x420cc #define _CHICKEN_TRANS_D 0x420d8 -#define CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ +#define _CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \ [TRANSCODER_A] = _CHICKEN_TRANS_A, \ [TRANSCODER_B] = _CHICKEN_TRANS_B, \ @@ -2810,9 +2516,10 @@ [TRANSCODER_D] = _CHICKEN_TRANS_D)) #define _MTL_CHICKEN_TRANS_A 0x604e0 #define _MTL_CHICKEN_TRANS_B 0x614e0 -#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \ +#define _MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \ _MTL_CHICKEN_TRANS_A, \ _MTL_CHICKEN_TRANS_B) +#define CHICKEN_TRANS(display, trans) (DISPLAY_VER(display) >= 14 ? _MTL_CHICKEN_TRANS(trans) : _CHICKEN_TRANS(trans)) #define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */ #define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */ #define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) @@ -2863,11 +2570,16 @@ #define RESET_PCH_HANDSHAKE_ENABLE REG_BIT(4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) -#define LATENCY_REPORTING_REMOVED_PIPE_D REG_BIT(31) +#define _LATENCY_REPORTING_REMOVED_PIPE_D REG_BIT(31) #define SKL_SELECT_ALTERNATE_DC_EXIT REG_BIT(30) -#define LATENCY_REPORTING_REMOVED_PIPE_C REG_BIT(25) -#define LATENCY_REPORTING_REMOVED_PIPE_B REG_BIT(24) -#define LATENCY_REPORTING_REMOVED_PIPE_A REG_BIT(23) +#define _LATENCY_REPORTING_REMOVED_PIPE_C REG_BIT(25) +#define _LATENCY_REPORTING_REMOVED_PIPE_B REG_BIT(24) +#define _LATENCY_REPORTING_REMOVED_PIPE_A REG_BIT(23) +#define LATENCY_REPORTING_REMOVED(pipe) _PICK((pipe), \ + _LATENCY_REPORTING_REMOVED_PIPE_A, \ + _LATENCY_REPORTING_REMOVED_PIPE_B, \ + _LATENCY_REPORTING_REMOVED_PIPE_C, \ + _LATENCY_REPORTING_REMOVED_PIPE_D) #define ICL_DELAY_PMRSP REG_BIT(22) #define DISABLE_FLR_SRC REG_BIT(15) #define MASK_WAKEMEM REG_BIT(13) @@ -3619,6 +3331,7 @@ #define POWER_SETUP_I1_WATTS REG_BIT(31) #define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ #define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define POWER_SETUP_SUBCOMMAND_G8_ENABLE 0x6 #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define XEHP_PCODE_FREQUENCY_CONFIG 0x6e /* pvc */ /* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */ @@ -3819,6 +3532,7 @@ enum skl_power_gate { #define TRANS_DDI_PVSYNC (1 << 17) #define TRANS_DDI_PHSYNC (1 << 16) #define TRANS_DDI_PORT_SYNC_ENABLE REG_BIT(15) +#define XE3_TRANS_DDI_HDCP_LINE_REKEY_DISABLE REG_BIT(15) #define TRANS_DDI_EDP_INPUT_MASK (7 << 12) #define TRANS_DDI_EDP_INPUT_A_ON (0 << 12) #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) @@ -3863,25 +3577,26 @@ enum skl_power_gate { #define _TGL_DP_TP_CTL_A 0x60540 #define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B) #define TGL_DP_TP_CTL(dev_priv, tran) _MMIO_TRANS2(dev_priv, (tran), _TGL_DP_TP_CTL_A) -#define DP_TP_CTL_ENABLE (1 << 31) -#define DP_TP_CTL_FEC_ENABLE (1 << 30) -#define DP_TP_CTL_MODE_SST (0 << 27) -#define DP_TP_CTL_MODE_MST (1 << 27) -#define DP_TP_CTL_FORCE_ACT (1 << 25) -#define DP_TP_CTL_TRAIN_PAT4_SEL_MASK (3 << 19) -#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4A (0 << 19) -#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4B (1 << 19) -#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4C (2 << 19) -#define DP_TP_CTL_ENHANCED_FRAME_ENABLE (1 << 18) -#define DP_TP_CTL_FDI_AUTOTRAIN (1 << 15) -#define DP_TP_CTL_LINK_TRAIN_MASK (7 << 8) -#define DP_TP_CTL_LINK_TRAIN_PAT1 (0 << 8) -#define DP_TP_CTL_LINK_TRAIN_PAT2 (1 << 8) -#define DP_TP_CTL_LINK_TRAIN_PAT3 (4 << 8) -#define DP_TP_CTL_LINK_TRAIN_PAT4 (5 << 8) -#define DP_TP_CTL_LINK_TRAIN_IDLE (2 << 8) -#define DP_TP_CTL_LINK_TRAIN_NORMAL (3 << 8) -#define DP_TP_CTL_SCRAMBLE_DISABLE (1 << 7) +#define DP_TP_CTL_ENABLE REG_BIT(31) +#define DP_TP_CTL_FEC_ENABLE REG_BIT(30) +#define DP_TP_CTL_MODE_MASK REG_BIT(27) +#define DP_TP_CTL_MODE_SST REG_FIELD_PREP(DP_TP_CTL_MODE_MASK, 0) +#define DP_TP_CTL_MODE_MST REG_FIELD_PREP(DP_TP_CTL_MODE_MASK, 1) +#define DP_TP_CTL_FORCE_ACT REG_BIT(25) +#define DP_TP_CTL_TRAIN_PAT4_SEL_MASK REG_GENMASK(20, 19) +#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4A REG_FIELD_PREP(DP_TP_CTL_TRAIN_PAT4_SEL_MASK, 0) +#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4B REG_FIELD_PREP(DP_TP_CTL_TRAIN_PAT4_SEL_MASK, 1) +#define DP_TP_CTL_TRAIN_PAT4_SEL_TP4C REG_FIELD_PREP(DP_TP_CTL_TRAIN_PAT4_SEL_MASK, 2) +#define DP_TP_CTL_ENHANCED_FRAME_ENABLE REG_BIT(18) +#define DP_TP_CTL_FDI_AUTOTRAIN REG_BIT(15) +#define DP_TP_CTL_LINK_TRAIN_MASK REG_GENMASK(10, 8) +#define DP_TP_CTL_LINK_TRAIN_PAT1 REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 0) +#define DP_TP_CTL_LINK_TRAIN_PAT2 REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 1) +#define DP_TP_CTL_LINK_TRAIN_PAT3 REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 4) +#define DP_TP_CTL_LINK_TRAIN_PAT4 REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 5) +#define DP_TP_CTL_LINK_TRAIN_IDLE REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 2) +#define DP_TP_CTL_LINK_TRAIN_NORMAL REG_FIELD_PREP(DP_TP_CTL_LINK_TRAIN_MASK, 3) +#define DP_TP_CTL_SCRAMBLE_DISABLE REG_BIT(7) /* DisplayPort Transport Status */ #define _DP_TP_STATUS_A 0x64044 @@ -3889,14 +3604,15 @@ enum skl_power_gate { #define _TGL_DP_TP_STATUS_A 0x60544 #define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B) #define TGL_DP_TP_STATUS(dev_priv, tran) _MMIO_TRANS2(dev_priv, (tran), _TGL_DP_TP_STATUS_A) -#define DP_TP_STATUS_FEC_ENABLE_LIVE (1 << 28) -#define DP_TP_STATUS_IDLE_DONE (1 << 25) -#define DP_TP_STATUS_ACT_SENT (1 << 24) -#define DP_TP_STATUS_MODE_STATUS_MST (1 << 23) -#define DP_TP_STATUS_AUTOTRAIN_DONE (1 << 12) -#define DP_TP_STATUS_PAYLOAD_MAPPING_VC2 (3 << 8) -#define DP_TP_STATUS_PAYLOAD_MAPPING_VC1 (3 << 4) -#define DP_TP_STATUS_PAYLOAD_MAPPING_VC0 (3 << 0) +#define DP_TP_STATUS_FEC_ENABLE_LIVE REG_BIT(28) +#define DP_TP_STATUS_IDLE_DONE REG_BIT(25) +#define DP_TP_STATUS_ACT_SENT REG_BIT(24) +#define DP_TP_STATUS_MODE_STATUS_MST REG_BIT(23) +#define DP_TP_STATUS_STREAMS_ENABLED_MASK REG_GENMASK(18, 16) /* 17:16 on hsw but bit 18 mbz */ +#define DP_TP_STATUS_AUTOTRAIN_DONE REG_BIT(12) +#define DP_TP_STATUS_PAYLOAD_MAPPING_VC2_MASK REG_GENMASK(9, 8) +#define DP_TP_STATUS_PAYLOAD_MAPPING_VC1_MASK REG_GENMASK(5, 4) +#define DP_TP_STATUS_PAYLOAD_MAPPING_VC0_MASK REG_GENMASK(1, 0) /* DDI Buffer Control */ #define _DDI_BUF_CTL_A 0x64000 @@ -4444,14 +4160,6 @@ enum skl_power_gate { #define SFUSE_STRAP_DDIC_DETECTED (1 << 1) #define SFUSE_STRAP_DDID_DETECTED (1 << 0) -#define WM_MISC _MMIO(0x45260) -#define WM_MISC_DATA_PARTITION_5_6 (1 << 0) - -#define WM_DBG _MMIO(0x45280) -#define WM_DBG_DISALLOW_MULTIPLE_LP (1 << 0) -#define WM_DBG_DISALLOW_MAXFIFO (1 << 1) -#define WM_DBG_DISALLOW_SPRITE (1 << 2) - /* Gen4+ Timestamp and Pipe Frame time stamp registers */ #define GEN4_TIMESTAMP _MMIO(0x2358) #define ILK_TIMESTAMP_HI _MMIO(0x70070) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 762127dd56c5..70a854557e6e 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -506,6 +506,6 @@ int __init i915_scheduler_module_init(void) return 0; err_priorities: - kmem_cache_destroy(slab_priorities); + kmem_cache_destroy(slab_dependencies); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c deleted file mode 100644 index f18f1acf2158..000000000000 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * - * Copyright 2008 (c) Intel Corporation - * Jesse Barnes <jbarnes@virtuousgeek.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "display/intel_de.h" -#include "display/intel_gmbus.h" -#include "display/intel_vga.h" - -#include "i915_drv.h" -#include "i915_reg.h" -#include "i915_suspend.h" -#include "intel_pci_config.h" - -static void intel_save_swf(struct drm_i915_private *dev_priv) -{ - int i; - - /* Scratch space */ - if (GRAPHICS_VER(dev_priv) == 2 && IS_MOBILE(dev_priv)) { - for (i = 0; i < 7; i++) { - dev_priv->regfile.saveSWF0[i] = intel_de_read(dev_priv, - SWF0(dev_priv, i)); - dev_priv->regfile.saveSWF1[i] = intel_de_read(dev_priv, - SWF1(dev_priv, i)); - } - for (i = 0; i < 3; i++) - dev_priv->regfile.saveSWF3[i] = intel_de_read(dev_priv, - SWF3(dev_priv, i)); - } else if (GRAPHICS_VER(dev_priv) == 2) { - for (i = 0; i < 7; i++) - dev_priv->regfile.saveSWF1[i] = intel_de_read(dev_priv, - SWF1(dev_priv, i)); - } else if (HAS_GMCH(dev_priv)) { - for (i = 0; i < 16; i++) { - dev_priv->regfile.saveSWF0[i] = intel_de_read(dev_priv, - SWF0(dev_priv, i)); - dev_priv->regfile.saveSWF1[i] = intel_de_read(dev_priv, - SWF1(dev_priv, i)); - } - for (i = 0; i < 3; i++) - dev_priv->regfile.saveSWF3[i] = intel_de_read(dev_priv, - SWF3(dev_priv, i)); - } -} - -static void intel_restore_swf(struct drm_i915_private *dev_priv) -{ - int i; - - /* Scratch space */ - if (GRAPHICS_VER(dev_priv) == 2 && IS_MOBILE(dev_priv)) { - for (i = 0; i < 7; i++) { - intel_de_write(dev_priv, SWF0(dev_priv, i), - dev_priv->regfile.saveSWF0[i]); - intel_de_write(dev_priv, SWF1(dev_priv, i), - dev_priv->regfile.saveSWF1[i]); - } - for (i = 0; i < 3; i++) - intel_de_write(dev_priv, SWF3(dev_priv, i), - dev_priv->regfile.saveSWF3[i]); - } else if (GRAPHICS_VER(dev_priv) == 2) { - for (i = 0; i < 7; i++) - intel_de_write(dev_priv, SWF1(dev_priv, i), - dev_priv->regfile.saveSWF1[i]); - } else if (HAS_GMCH(dev_priv)) { - for (i = 0; i < 16; i++) { - intel_de_write(dev_priv, SWF0(dev_priv, i), - dev_priv->regfile.saveSWF0[i]); - intel_de_write(dev_priv, SWF1(dev_priv, i), - dev_priv->regfile.saveSWF1[i]); - } - for (i = 0; i < 3; i++) - intel_de_write(dev_priv, SWF3(dev_priv, i), - dev_priv->regfile.saveSWF3[i]); - } -} - -void i915_save_display(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - - if (!HAS_DISPLAY(dev_priv)) - return; - - /* Display arbitration control */ - if (GRAPHICS_VER(dev_priv) <= 4) - dev_priv->regfile.saveDSPARB = intel_de_read(dev_priv, - DSPARB(dev_priv)); - - if (GRAPHICS_VER(dev_priv) == 4) - pci_read_config_word(pdev, GCDGMBUS, - &dev_priv->regfile.saveGCDGMBUS); - - intel_save_swf(dev_priv); -} - -void i915_restore_display(struct drm_i915_private *dev_priv) -{ - struct intel_display *display = &dev_priv->display; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - - if (!HAS_DISPLAY(dev_priv)) - return; - - intel_restore_swf(dev_priv); - - if (GRAPHICS_VER(dev_priv) == 4) - pci_write_config_word(pdev, GCDGMBUS, - dev_priv->regfile.saveGCDGMBUS); - - /* Display arbitration */ - if (GRAPHICS_VER(dev_priv) <= 4) - intel_de_write(dev_priv, DSPARB(dev_priv), - dev_priv->regfile.saveDSPARB); - - intel_vga_redisable(display); - - intel_gmbus_reset(display); -} diff --git a/drivers/gpu/drm/i915/i915_suspend.h b/drivers/gpu/drm/i915/i915_suspend.h deleted file mode 100644 index e5a611ee3d15..000000000000 --- a/drivers/gpu/drm/i915/i915_suspend.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_SUSPEND_H__ -#define __I915_SUSPEND_H__ - -struct drm_i915_private; - -void i915_save_display(struct drm_i915_private *i915); -void i915_restore_display(struct drm_i915_private *i915); - -#endif /* __I915_SUSPEND_H__ */ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 09d89bdf82f4..7ed41ce9b708 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -642,34 +642,6 @@ DEFINE_EVENT(i915_request, i915_request_wait_end, TP_ARGS(rq) ); -TRACE_EVENT_CONDITION(i915_reg_rw, - TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), - - TP_ARGS(write, reg, val, len, trace), - - TP_CONDITION(trace), - - TP_STRUCT__entry( - __field(u64, val) - __field(u32, reg) - __field(u16, write) - __field(u16, len) - ), - - TP_fast_assign( - __entry->val = (u64)val; - __entry->reg = i915_mmio_reg_offset(reg); - __entry->write = write; - __entry->len = len; - ), - - TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)", - __entry->write ? "write" : "read", - __entry->reg, __entry->len, - (u32)(__entry->val & 0xffffffff), - (u32)(__entry->val >> 32)) -); - /** * DOC: i915_ppgtt_create and i915_ppgtt_release tracepoints * diff --git a/drivers/gpu/drm/i915/intel_cpu_info.c b/drivers/gpu/drm/i915/intel_cpu_info.c new file mode 100644 index 000000000000..e52d0ac713a9 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cpu_info.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + * + * Avoid INTEL_<PLATFORM> name collisions between asm/intel-family.h and + * intel_device_info.h by having a separate file. + */ + +#include "intel_cpu_info.h" + +#ifdef CONFIG_X86 +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> + +static const struct x86_cpu_id g8_cpu_ids[] = { + X86_MATCH_VFM(INTEL_ALDERLAKE, NULL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL), + X86_MATCH_VFM(INTEL_COMETLAKE, NULL), + X86_MATCH_VFM(INTEL_KABYLAKE, NULL), + X86_MATCH_VFM(INTEL_KABYLAKE_L, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, NULL), + X86_MATCH_VFM(INTEL_ROCKETLAKE, NULL), + {} +}; + +/** + * intel_match_g8_cpu - match current CPU against g8_cpu_ids + * + * This matches current CPU against g8_cpu_ids, which are applicable + * for G8 workaround. + * + * Returns: %true if matches, %false otherwise. + */ +bool intel_match_g8_cpu(void) +{ + return x86_match_cpu(g8_cpu_ids); +} +#else /* CONFIG_X86 */ + +bool intel_match_g8_cpu(void) { return false; } + +#endif /* CONFIG_X86 */ diff --git a/drivers/gpu/drm/i915/intel_cpu_info.h b/drivers/gpu/drm/i915/intel_cpu_info.h new file mode 100644 index 000000000000..d898fb463d31 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cpu_info.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _INTEL_CPU_INFO_H_ +#define _INTEL_CPU_INFO_H_ + +#include <linux/types.h> + +bool intel_match_g8_cpu(void); + +#endif /* _INTEL_CPU_INFO_H_ */ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 856b30fa37dc..bbe3a24fe3d9 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -200,6 +200,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_dg2_d_ids[] = { + INTEL_DG2_D_IDS(ID), +}; + static const u16 subplatform_arl_h_ids[] = { INTEL_ARL_H_IDS(ID), }; @@ -280,6 +284,11 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) mask = BIT(INTEL_SUBPLATFORM_ARL_S); } + /* DG2_D ids span across multiple DG2 subplatforms */ + if (find_devid(devid, subplatform_dg2_d_ids, + ARRAY_SIZE(subplatform_dg2_d_ids))) + mask |= BIT(INTEL_SUBPLATFORM_D); + GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ef84eea9ba0b..9387385cb418 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -95,9 +95,11 @@ enum intel_platform { /* * Subplatform bits share the same namespace per parent platform. In other words * it is fine for the same bit to be used on multiple parent platforms. + * Devices can belong to multiple subplatforms if needed, so it's possible to set + * multiple bits for same device. */ -#define INTEL_SUBPLATFORM_BITS (3) +#define INTEL_SUBPLATFORM_BITS (4) #define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1) /* HSW/BDW/SKL/KBL/CFL */ @@ -114,6 +116,7 @@ enum intel_platform { #define INTEL_SUBPLATFORM_G10 0 #define INTEL_SUBPLATFORM_G11 1 #define INTEL_SUBPLATFORM_G12 2 +#define INTEL_SUBPLATFORM_D 3 /* ADL */ #define INTEL_SUBPLATFORM_RPL 0 diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index aa51f366626c..ee1cd2126f97 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -5,9 +5,11 @@ #include "display/bxt_dpio_phy_regs.h" #include "display/i9xx_plane_regs.h" +#include "display/i9xx_wm_regs.h" #include "display/intel_audio_regs.h" #include "display/intel_backlight_regs.h" #include "display/intel_color_regs.h" +#include "display/intel_crt_regs.h" #include "display/intel_cursor_regs.h" #include "display/intel_display_types.h" #include "display/intel_dmc_regs.h" diff --git a/drivers/gpu/drm/i915/intel_sbi.c b/drivers/gpu/drm/i915/intel_sbi.c index 5c6e517c73f4..41e85ac773dc 100644 --- a/drivers/gpu/drm/i915/intel_sbi.c +++ b/drivers/gpu/drm/i915/intel_sbi.c @@ -17,7 +17,7 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, struct intel_uncore *uncore = &i915->uncore; u32 cmd; - lockdep_assert_held(&i915->sb_lock); + lockdep_assert_held(&i915->sbi_lock); if (intel_wait_for_register_fw(uncore, SBI_CTL_STAT, SBI_BUSY, 0, @@ -57,6 +57,16 @@ static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg, return 0; } +void intel_sbi_lock(struct drm_i915_private *i915) +{ + mutex_lock(&i915->sbi_lock); +} + +void intel_sbi_unlock(struct drm_i915_private *i915) +{ + mutex_unlock(&i915->sbi_lock); +} + u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg, enum intel_sbi_destination destination) { @@ -72,3 +82,13 @@ void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value, { intel_sbi_rw(i915, reg, destination, &value, false); } + +void intel_sbi_init(struct drm_i915_private *i915) +{ + mutex_init(&i915->sbi_lock); +} + +void intel_sbi_fini(struct drm_i915_private *i915) +{ + mutex_destroy(&i915->sbi_lock); +} diff --git a/drivers/gpu/drm/i915/intel_sbi.h b/drivers/gpu/drm/i915/intel_sbi.h index f5a862210454..85161a4f13b8 100644 --- a/drivers/gpu/drm/i915/intel_sbi.h +++ b/drivers/gpu/drm/i915/intel_sbi.h @@ -15,6 +15,10 @@ enum intel_sbi_destination { SBI_MPHY, }; +void intel_sbi_init(struct drm_i915_private *i915); +void intel_sbi_fini(struct drm_i915_private *i915); +void intel_sbi_lock(struct drm_i915_private *i915); +void intel_sbi_unlock(struct drm_i915_private *i915); u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg, enum intel_sbi_destination destination); void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6aa179a3e92a..eed4937c3ff3 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -31,12 +31,17 @@ #include "i915_drv.h" #include "i915_iosf_mbi.h" #include "i915_reg.h" -#include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_uncore_trace.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 #define GT_FIFO_TIMEOUT_MS 10 +struct intel_uncore *to_intel_uncore(struct drm_device *drm) +{ + return &to_i915(drm)->uncore; +} + #define __raw_posting_read(...) ((void)__raw_uncore_read32(__VA_ARGS__)) static void diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index f419c311a0de..e39582950627 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -501,6 +501,8 @@ static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore) return uncore->regs; } +struct intel_uncore *to_intel_uncore(struct drm_device *drm); + /* * The raw_reg_{read,write} macros are intended as a micro-optimization for * interrupt handlers so that the pointer indirection on uncore->regs can diff --git a/drivers/gpu/drm/i915/intel_uncore_trace.c b/drivers/gpu/drm/i915/intel_uncore_trace.c new file mode 100644 index 000000000000..86f0c3942b1d --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uncore_trace.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright © 2024 Intel Corporation */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "intel_uncore_trace.h" +#endif diff --git a/drivers/gpu/drm/i915/intel_uncore_trace.h b/drivers/gpu/drm/i915/intel_uncore_trace.h new file mode 100644 index 000000000000..f13ff71edf2d --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uncore_trace.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright © 2024 Intel Corporation */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 + +#if !defined(__INTEL_UNCORE_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __INTEL_UNCORE_TRACE_H__ + +#include "i915_reg_defs.h" + +#include <linux/types.h> +#include <linux/tracepoint.h> + +TRACE_EVENT_CONDITION(i915_reg_rw, + TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), + + TP_ARGS(write, reg, val, len, trace), + + TP_CONDITION(trace), + + TP_STRUCT__entry( + __field(u64, val) + __field(u32, reg) + __field(u16, write) + __field(u16, len) + ), + + TP_fast_assign( + __entry->val = (u64)val; + __entry->reg = i915_mmio_reg_offset(reg); + __entry->write = write; + __entry->len = len; + ), + + TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)", + __entry->write ? "write" : "read", + __entry->reg, __entry->len, + (u32)(__entry->val & 0xffffffff), + (u32)(__entry->val >> 32)) +); +#endif /* __INTEL_UNCORE_TRACE_H__ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#define TRACE_INCLUDE_FILE intel_uncore_trace +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index acae30a04a94..88870844b5bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -492,7 +492,7 @@ static int mock_breadcrumbs_smoketest(void *arg) for (n = 0; n < ncpus; n++) { struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/%d", n); + worker = kthread_run_worker(0, "igt/%d", n); if (IS_ERR(worker)) { ret = PTR_ERR(worker); ncpus = n; @@ -1645,7 +1645,7 @@ static int live_parallel_engines(void *arg) for_each_uabi_engine(engine, i915) { struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/parallel:%s", + worker = kthread_run_worker(0, "igt/parallel:%s", engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); @@ -1806,7 +1806,7 @@ static int live_breadcrumbs_smoketest(void *arg) unsigned int i = idx * ncpus + n; struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/%d.%d", idx, n); + worker = kthread_run_worker(0, "igt/%d.%d", idx, n); if (IS_ERR(worker)) { ret = PTR_ERR(worker); goto out_flush; @@ -3219,7 +3219,7 @@ static int perf_parallel_engines(void *arg) memset(&engines[idx].p, 0, sizeof(engines[idx].p)); - worker = kthread_create_worker(0, "igt:%s", + worker = kthread_run_worker(0, "igt:%s", engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 29110abb4fe0..c383d31d46b0 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -19,12 +19,22 @@ int igt_flush_test(struct drm_i915_private *i915) int ret = 0; for_each_gt(gt, i915, i) { + struct intel_engine_cs *engine; + unsigned long timeout_ms = 0; + unsigned int id; + if (intel_gt_is_wedged(gt)) ret = -EIO; + for_each_engine(engine, gt, id) { + if (engine->props.preempt_timeout_ms > timeout_ms) + timeout_ms = engine->props.preempt_timeout_ms; + } + cond_resched(); - if (intel_gt_wait_for_idle(gt, HZ * 3) == -ETIME) { + /* 2x longest preempt timeout, experimentally determined */ + if (intel_gt_wait_for_idle(gt, HZ * timeout_ms / 500) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index ae57eb03dfca..a77e5b26542c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -180,7 +180,7 @@ struct drm_i915_private *mock_gem_device(void) /* Set up device info and initial runtime info. */ intel_device_info_driver_create(i915, pdev->device, &mock_info); - intel_display_device_probe(i915); + intel_display_device_probe(pdev); dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c index 68291412f4cb..114ae8eb9cd5 100644 --- a/drivers/gpu/drm/i915/vlv_sideband.c +++ b/drivers/gpu/drm/i915/vlv_sideband.c @@ -43,7 +43,7 @@ static void __vlv_punit_get(struct drm_i915_private *i915) * to the Valleyview P-unit and not all sideband communications. */ if (IS_VALLEYVIEW(i915)) { - cpu_latency_qos_update_request(&i915->sb_qos, 0); + cpu_latency_qos_update_request(&i915->vlv_iosf_sb.qos, 0); on_each_cpu(ping, NULL, 1); } } @@ -51,7 +51,7 @@ static void __vlv_punit_get(struct drm_i915_private *i915) static void __vlv_punit_put(struct drm_i915_private *i915) { if (IS_VALLEYVIEW(i915)) - cpu_latency_qos_update_request(&i915->sb_qos, + cpu_latency_qos_update_request(&i915->vlv_iosf_sb.qos, PM_QOS_DEFAULT_VALUE); iosf_mbi_punit_release(); @@ -62,12 +62,12 @@ void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) if (ports & BIT(VLV_IOSF_SB_PUNIT)) __vlv_punit_get(i915); - mutex_lock(&i915->sb_lock); + mutex_lock(&i915->vlv_iosf_sb.lock); } void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) { - mutex_unlock(&i915->sb_lock); + mutex_unlock(&i915->vlv_iosf_sb.lock); if (ports & BIT(VLV_IOSF_SB_PUNIT)) __vlv_punit_put(i915); @@ -81,7 +81,7 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP); int err; - lockdep_assert_held(&i915->sb_lock); + lockdep_assert_held(&i915->vlv_iosf_sb.lock); if (port == IOSF_PORT_PUNIT) iosf_mbi_assert_punit_acquired(); @@ -249,3 +249,21 @@ void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP, reg, &val); } + +void vlv_iosf_sb_init(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + mutex_init(&i915->vlv_iosf_sb.lock); + + if (IS_VALLEYVIEW(i915)) + cpu_latency_qos_add_request(&i915->vlv_iosf_sb.qos, PM_QOS_DEFAULT_VALUE); +} + +void vlv_iosf_sb_fini(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915)) + cpu_latency_qos_remove_request(&i915->vlv_iosf_sb.qos); + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + mutex_destroy(&i915->vlv_iosf_sb.lock); +} diff --git a/drivers/gpu/drm/i915/vlv_sideband.h b/drivers/gpu/drm/i915/vlv_sideband.h index c20cf41b2d39..31813e07c56f 100644 --- a/drivers/gpu/drm/i915/vlv_sideband.h +++ b/drivers/gpu/drm/i915/vlv_sideband.h @@ -25,6 +25,9 @@ enum { VLV_IOSF_SB_PUNIT, }; +void vlv_iosf_sb_init(struct drm_i915_private *i915); +void vlv_iosf_sb_fini(struct drm_i915_private *i915); + void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); diff --git a/drivers/gpu/drm/i915/vlv_suspend.c b/drivers/gpu/drm/i915/vlv_suspend.c index 94595dde2b96..fc9f311ea1db 100644 --- a/drivers/gpu/drm/i915/vlv_suspend.c +++ b/drivers/gpu/drm/i915/vlv_suspend.c @@ -13,6 +13,7 @@ #include "i915_trace.h" #include "i915_utils.h" #include "intel_clock_gating.h" +#include "intel_uncore_trace.h" #include "vlv_suspend.h" #include "gt/intel_gt_regs.h" diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index f496e6cfdfe0..e47debd60619 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -14,9 +14,6 @@ config DRM_MEDIATEK select DRM_BRIDGE_CONNECTOR select DRM_MIPI_DSI select DRM_PANEL - select MEMORY - select MTK_SMI - select PHY_MTK_MIPI_DSI select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. @@ -27,7 +24,6 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" depends on DRM_MEDIATEK - select PHY_MTK_DP select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_DP_AUX_BUS @@ -38,6 +34,5 @@ config DRM_MEDIATEK_HDMI tristate "DRM HDMI Support for Mediatek SoCs" depends on DRM_MEDIATEK select SND_SOC_HDMI_CODEC if SND_SOC - select PHY_MTK_HDMI help DRM/KMS HDMI driver for Mediatek SoCs diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index eb0e1233ad04..5674f5707cca 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -112,6 +112,11 @@ static void mtk_drm_finish_page_flip(struct mtk_crtc *mtk_crtc) drm_crtc_handle_vblank(&mtk_crtc->base); +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (mtk_crtc->cmdq_client.chan) + return; +#endif + spin_lock_irqsave(&mtk_crtc->config_lock, flags); if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) { mtk_crtc_finish_page_flip(mtk_crtc); @@ -284,10 +289,8 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) state = to_mtk_crtc_state(mtk_crtc->base.state); spin_lock_irqsave(&mtk_crtc->config_lock, flags); - if (mtk_crtc->config_updating) { - spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + if (mtk_crtc->config_updating) goto ddp_cmdq_cb_out; - } state->pending_config = false; @@ -315,10 +318,15 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) mtk_crtc->pending_async_planes = false; } - spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); - ddp_cmdq_cb_out: + if (mtk_crtc->pending_needs_vblank) { + mtk_crtc_finish_page_flip(mtk_crtc); + mtk_crtc->pending_needs_vblank = false; + } + + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + mtk_crtc->cmdq_vblank_cnt = 0; wake_up(&mtk_crtc->cb_blocking_queue); } @@ -606,13 +614,18 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) */ mtk_crtc->cmdq_vblank_cnt = 3; + spin_lock_irqsave(&mtk_crtc->config_lock, flags); + mtk_crtc->config_updating = false; + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } -#endif +#else spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); +#endif mutex_unlock(&mtk_crtc->hw_lock); } diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index f731d4fbe8b6..df82cea4bb79 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -460,6 +460,29 @@ static unsigned int mtk_ovl_fmt_convert(struct mtk_disp_ovl *ovl, } } +static void mtk_ovl_afbc_layer_config(struct mtk_disp_ovl *ovl, + unsigned int idx, + struct mtk_plane_pending_state *pending, + struct cmdq_pkt *cmdq_pkt) +{ + unsigned int pitch_msb = pending->pitch >> 16; + unsigned int hdr_pitch = pending->hdr_pitch; + unsigned int hdr_addr = pending->hdr_addr; + + if (pending->modifier != DRM_FORMAT_MOD_LINEAR) { + mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_ADDR(ovl, idx)); + mtk_ddp_write_relaxed(cmdq_pkt, + OVL_PITCH_MSB_2ND_SUBBUF | pitch_msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_PITCH(ovl, idx)); + } else { + mtk_ddp_write_relaxed(cmdq_pkt, pitch_msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + } +} + void mtk_ovl_layer_config(struct device *dev, unsigned int idx, struct mtk_plane_state *state, struct cmdq_pkt *cmdq_pkt) @@ -467,25 +490,14 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); struct mtk_plane_pending_state *pending = &state->pending; unsigned int addr = pending->addr; - unsigned int hdr_addr = pending->hdr_addr; - unsigned int pitch = pending->pitch; - unsigned int hdr_pitch = pending->hdr_pitch; + unsigned int pitch_lsb = pending->pitch & GENMASK(15, 0); unsigned int fmt = pending->format; + unsigned int rotation = pending->rotation; unsigned int offset = (pending->y << 16) | pending->x; unsigned int src_size = (pending->height << 16) | pending->width; unsigned int blend_mode = state->base.pixel_blend_mode; unsigned int ignore_pixel_alpha = 0; unsigned int con; - bool is_afbc = pending->modifier != DRM_FORMAT_MOD_LINEAR; - union overlay_pitch { - struct split_pitch { - u16 lsb; - u16 msb; - } split_pitch; - u32 pitch; - } overlay_pitch; - - overlay_pitch.pitch = pitch; if (!pending->enable) { mtk_ovl_layer_off(dev, idx, cmdq_pkt); @@ -513,22 +525,30 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, ignore_pixel_alpha = OVL_CONST_BLEND; } - if (pending->rotation & DRM_MODE_REFLECT_Y) { + /* + * Treat rotate 180 as flip x + flip y, and XOR the original rotation value + * to flip x + flip y to support both in the same time. + */ + if (rotation & DRM_MODE_ROTATE_180) + rotation ^= DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y; + + if (rotation & DRM_MODE_REFLECT_Y) { con |= OVL_CON_VIRT_FLIP; addr += (pending->height - 1) * pending->pitch; } - if (pending->rotation & DRM_MODE_REFLECT_X) { + if (rotation & DRM_MODE_REFLECT_X) { con |= OVL_CON_HORZ_FLIP; addr += pending->pitch - 1; } if (ovl->data->supports_afbc) - mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, is_afbc); + mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, + pending->modifier != DRM_FORMAT_MOD_LINEAR); mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_CON(idx)); - mtk_ddp_write_relaxed(cmdq_pkt, overlay_pitch.split_pitch.lsb | ignore_pixel_alpha, + mtk_ddp_write_relaxed(cmdq_pkt, pitch_lsb | ignore_pixel_alpha, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx)); mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_SRC_SIZE(idx)); @@ -537,19 +557,8 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_ADDR(ovl, idx)); - if (is_afbc) { - mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs, - DISP_REG_OVL_HDR_ADDR(ovl, idx)); - mtk_ddp_write_relaxed(cmdq_pkt, - OVL_PITCH_MSB_2ND_SUBBUF | overlay_pitch.split_pitch.msb, - &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); - mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs, - DISP_REG_OVL_HDR_PITCH(ovl, idx)); - } else { - mtk_ddp_write_relaxed(cmdq_pkt, - overlay_pitch.split_pitch.msb, - &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); - } + if (ovl->data->supports_afbc) + mtk_ovl_afbc_layer_config(ovl, idx, pending, cmdq_pkt); mtk_ovl_set_bit_depth(dev, idx, fmt, cmdq_pkt); mtk_ovl_layer_on(dev, idx, cmdq_pkt); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index 5c805094daf6..cd385ba4c66a 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -543,18 +543,16 @@ static int mtk_dp_set_color_format(struct mtk_dp *mtk_dp, enum dp_pixelformat color_format) { u32 val; - - /* update MISC0 */ - mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3034, - color_format << DP_TEST_COLOR_FORMAT_SHIFT, - DP_TEST_COLOR_FORMAT_MASK); + u32 misc0_color; switch (color_format) { case DP_PIXELFORMAT_YUV422: val = PIXEL_ENCODE_FORMAT_DP_ENC0_P0_YCBCR422; + misc0_color = DP_COLOR_FORMAT_YCbCr422; break; case DP_PIXELFORMAT_RGB: val = PIXEL_ENCODE_FORMAT_DP_ENC0_P0_RGB; + misc0_color = DP_COLOR_FORMAT_RGB; break; default: drm_warn(mtk_dp->drm_dev, "Unsupported color format: %d\n", @@ -562,6 +560,11 @@ static int mtk_dp_set_color_format(struct mtk_dp *mtk_dp, return -EINVAL; } + /* update MISC0 */ + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3034, + misc0_color, + DP_TEST_COLOR_FORMAT_MASK); + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_303C, val, PIXEL_ENCODE_FORMAT_DP_ENC0_P0_MASK); return 0; @@ -1135,6 +1138,18 @@ static void mtk_dp_digital_sw_reset(struct mtk_dp *mtk_dp) 0, DP_TX_TRANSMITTER_4P_RESET_SW_DP_TRANS_P0); } +static void mtk_dp_sdp_path_reset(struct mtk_dp *mtk_dp) +{ + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3004, + SDP_RESET_SW_DP_ENC0_P0, + SDP_RESET_SW_DP_ENC0_P0); + + /* Wait for sdp path reset to complete */ + usleep_range(1000, 5000); + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3004, + 0, SDP_RESET_SW_DP_ENC0_P0); +} + static void mtk_dp_set_lanes(struct mtk_dp *mtk_dp, int lanes) { mtk_dp_update_bits(mtk_dp, MTK_DP_TRANS_P0_35F0, @@ -1165,17 +1180,25 @@ static void mtk_dp_get_calibration_data(struct mtk_dp *mtk_dp) buf = (u32 *)nvmem_cell_read(cell, &len); nvmem_cell_put(cell); - if (IS_ERR(buf) || ((len / sizeof(u32)) != 4)) { + if (IS_ERR(buf)) { dev_warn(dev, "Failed to read nvmem_cell_read\n"); - - if (!IS_ERR(buf)) - kfree(buf); - goto use_default_val; } + /* The cell length is in bytes. Convert it to be compatible with u32 buffer. */ + len /= sizeof(u32); + for (i = 0; i < MTK_DP_CAL_MAX; i++) { fmt = &mtk_dp->data->efuse_fmt[i]; + + if (fmt->idx >= len) { + dev_warn(mtk_dp->dev, + "Out-of-bound efuse data access, fmt idx = %d, buf len = %zu\n", + fmt->idx, len); + kfree(buf); + goto use_default_val; + } + cal_data[i] = (buf[fmt->idx] >> fmt->shift) & fmt->mask; if (cal_data[i] < fmt->min_val || cal_data[i] > fmt->max_val) { @@ -2100,7 +2123,6 @@ static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); enum drm_connector_status ret = connector_status_disconnected; bool enabled = mtk_dp->enabled; - u8 sink_count = 0; if (!mtk_dp->train_info.cable_plugged_in) return ret; @@ -2115,8 +2137,8 @@ static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) * function, we just need to check the HPD connection to check * whether we connect to a sink device. */ - drm_dp_dpcd_readb(&mtk_dp->aux, DP_SINK_COUNT, &sink_count); - if (DP_GET_SINK_COUNT(sink_count)) + + if (drm_dp_read_sink_count(&mtk_dp->aux) > 0) ret = connector_status_connected; if (!enabled) @@ -2397,6 +2419,9 @@ static void mtk_dp_bridge_atomic_disable(struct drm_bridge *bridge, DP_PWR_STATE_BANDGAP_TPLL, DP_PWR_STATE_MASK); + /* SDP path reset sw*/ + mtk_dp_sdp_path_reset(mtk_dp); + /* Ensure the sink is muted */ msleep(20); } @@ -2408,12 +2433,19 @@ mtk_dp_bridge_mode_valid(struct drm_bridge *bridge, { struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); u32 bpp = info->color_formats & DRM_COLOR_FORMAT_YCBCR422 ? 16 : 24; - u32 rate = min_t(u32, drm_dp_max_link_rate(mtk_dp->rx_cap) * - drm_dp_max_lane_count(mtk_dp->rx_cap), - drm_dp_bw_code_to_link_rate(mtk_dp->max_linkrate) * - mtk_dp->max_lanes); + u32 lane_count_min = mtk_dp->train_info.lane_count; + u32 rate = drm_dp_bw_code_to_link_rate(mtk_dp->train_info.link_rate) * + lane_count_min; - if (rate < mode->clock * bpp / 8) + /* + *FEC overhead is approximately 2.4% from DP 1.4a spec 2.2.1.4.2. + *The down-spread amplitude shall either be disabled (0.0%) or up + *to 0.5% from 1.4a 3.5.2.6. Add up to approximately 3% total overhead. + * + *Because rate is already divided by 10, + *mode->clock does not need to be multiplied by 10 + */ + if ((rate * 97 / 100) < (mode->clock * bpp / 8)) return MODE_CLOCK_HIGH; return MODE_OK; @@ -2454,10 +2486,9 @@ static u32 *mtk_dp_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct drm_display_info *display_info = &conn_state->connector->display_info; - u32 rate = min_t(u32, drm_dp_max_link_rate(mtk_dp->rx_cap) * - drm_dp_max_lane_count(mtk_dp->rx_cap), - drm_dp_bw_code_to_link_rate(mtk_dp->max_linkrate) * - mtk_dp->max_lanes); + u32 lane_count_min = mtk_dp->train_info.lane_count; + u32 rate = drm_dp_bw_code_to_link_rate(mtk_dp->train_info.link_rate) * + lane_count_min; *num_input_fmts = 0; @@ -2466,8 +2497,8 @@ static u32 *mtk_dp_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, * datarate of YUV422 and sink device supports YUV422, we output YUV422 * format. Use this condition, we can support more resolution. */ - if ((rate < (mode->clock * 24 / 8)) && - (rate > (mode->clock * 16 / 8)) && + if (((rate * 97 / 100) < (mode->clock * 24 / 8)) && + ((rate * 97 / 100) > (mode->clock * 16 / 8)) && (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422)) { input_fmts = kcalloc(1, sizeof(*input_fmts), GFP_KERNEL); if (!input_fmts) diff --git a/drivers/gpu/drm/mediatek/mtk_dp_reg.h b/drivers/gpu/drm/mediatek/mtk_dp_reg.h index 709b79480693..8ad7a9cc259e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp_reg.h +++ b/drivers/gpu/drm/mediatek/mtk_dp_reg.h @@ -86,6 +86,7 @@ #define MTK_DP_ENC0_P0_3004 0x3004 #define VIDEO_M_CODE_SEL_DP_ENC0_P0_MASK BIT(8) #define DP_TX_ENCODER_4P_RESET_SW_DP_ENC0_P0 BIT(9) +#define SDP_RESET_SW_DP_ENC0_P0 BIT(13) #define MTK_DP_ENC0_P0_3010 0x3010 #define HTOTAL_SW_DP_ENC0_P0_MASK GENMASK(15, 0) #define MTK_DP_ENC0_P0_3014 0x3014 diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 49af2b19a85a..f22ad2882697 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -358,7 +358,7 @@ static const struct of_device_id mtk_drm_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); -static int mtk_drm_match(struct device *dev, void *data) +static int mtk_drm_match(struct device *dev, const void *data) { if (!strncmp(dev_name(dev), "mediatek-drm", sizeof("mediatek-drm") - 1)) return true; @@ -372,11 +372,12 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) struct mtk_drm_private *temp_drm_priv; struct device_node *phandle = dev->parent->of_node; const struct of_device_id *of_id; + struct device_node *node; struct device *drm_dev; unsigned int cnt = 0; int i, j; - for_each_child_of_node_scoped(phandle->parent, node) { + for_each_child_of_node(phandle->parent, node) { struct platform_device *pdev; of_id = of_match_node(mtk_drm_of_ids, node); @@ -405,8 +406,10 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) if (temp_drm_priv->mtk_drm_bound) cnt++; - if (cnt == MAX_CRTC) + if (cnt == MAX_CRTC) { + of_node_put(node); break; + } } if (drm_priv->data->mmsys_dev_num == cnt) { @@ -671,6 +674,8 @@ err_deinit: err_free: private->drm = NULL; drm_dev_put(drm); + for (i = 0; i < private->data->mmsys_dev_num; i++) + private->all_drm_private[i]->drm = NULL; return ret; } @@ -898,7 +903,7 @@ static int mtk_drm_of_ddp_path_build_one(struct device *dev, enum mtk_crtc_path const unsigned int **out_path, unsigned int *out_path_len) { - struct device_node *next, *prev, *vdo = dev->parent->of_node; + struct device_node *next = NULL, *prev, *vdo = dev->parent->of_node; unsigned int temp_path[DDP_COMPONENT_DRM_ID_MAX] = { 0 }; unsigned int *final_ddp_path; unsigned short int idx = 0; @@ -1087,7 +1092,7 @@ static int mtk_drm_probe(struct platform_device *pdev) /* No devicetree graphs support: go with hardcoded paths if present */ dev_dbg(dev, "Using hardcoded paths for MMSYS %u\n", mtk_drm_data->mmsys_id); private->data = mtk_drm_data; - }; + } private->all_drm_private = devm_kmalloc_array(dev, private->data->mmsys_dev_num, sizeof(*private->all_drm_private), diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index e61b9bc68e9a..40752f232054 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -139,11 +139,11 @@ #define CLK_HS_POST GENMASK(15, 8) #define CLK_HS_EXIT GENMASK(23, 16) -#define DSI_VM_CMD_CON 0x130 +/* DSI_VM_CMD_CON */ #define VM_CMD_EN BIT(0) #define TS_VFP_EN BIT(5) -#define DSI_SHADOW_DEBUG 0x190U +/* DSI_SHADOW_DEBUG */ #define FORCE_COMMIT BIT(0) #define BYPASS_SHADOW BIT(1) @@ -187,6 +187,8 @@ struct phy; struct mtk_dsi_driver_data { const u32 reg_cmdq_off; + const u32 reg_vm_cmd_off; + const u32 reg_shadow_dbg_off; bool has_shadow_ctl; bool has_size_ctl; bool cmdq_long_packet_ctl; @@ -246,23 +248,22 @@ static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi) u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, HZ_PER_MHZ); struct mtk_phy_timing *timing = &dsi->phy_timing; - timing->lpx = (80 * data_rate_mhz / (8 * 1000)) + 1; - timing->da_hs_prepare = (59 * data_rate_mhz + 4 * 1000) / 8000 + 1; - timing->da_hs_zero = (163 * data_rate_mhz + 11 * 1000) / 8000 + 1 - + timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1; + timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000; + timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 - timing->da_hs_prepare; - timing->da_hs_trail = (78 * data_rate_mhz + 7 * 1000) / 8000 + 1; + timing->da_hs_trail = timing->da_hs_prepare + 1; - timing->ta_go = 4 * timing->lpx; - timing->ta_sure = 3 * timing->lpx / 2; - timing->ta_get = 5 * timing->lpx; - timing->da_hs_exit = (118 * data_rate_mhz / (8 * 1000)) + 1; + timing->ta_go = 4 * timing->lpx - 2; + timing->ta_sure = timing->lpx + 2; + timing->ta_get = 4 * timing->lpx; + timing->da_hs_exit = 2 * timing->lpx + 1; - timing->clk_hs_prepare = (57 * data_rate_mhz / (8 * 1000)) + 1; - timing->clk_hs_post = (65 * data_rate_mhz + 53 * 1000) / 8000 + 1; - timing->clk_hs_trail = (78 * data_rate_mhz + 7 * 1000) / 8000 + 1; - timing->clk_hs_zero = (330 * data_rate_mhz / (8 * 1000)) + 1 - - timing->clk_hs_prepare; - timing->clk_hs_exit = (118 * data_rate_mhz / (8 * 1000)) + 1; + timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000); + timing->clk_hs_post = timing->clk_hs_prepare + 8; + timing->clk_hs_trail = timing->clk_hs_prepare; + timing->clk_hs_zero = timing->clk_hs_trail * 4; + timing->clk_hs_exit = 2 * timing->clk_hs_trail; timcon0 = FIELD_PREP(LPX, timing->lpx) | FIELD_PREP(HS_PREP, timing->da_hs_prepare) | @@ -367,8 +368,8 @@ static void mtk_dsi_set_mode(struct mtk_dsi *dsi) static void mtk_dsi_set_vm_cmd(struct mtk_dsi *dsi) { - mtk_dsi_mask(dsi, DSI_VM_CMD_CON, VM_CMD_EN, VM_CMD_EN); - mtk_dsi_mask(dsi, DSI_VM_CMD_CON, TS_VFP_EN, TS_VFP_EN); + mtk_dsi_mask(dsi, dsi->driver_data->reg_vm_cmd_off, VM_CMD_EN, VM_CMD_EN); + mtk_dsi_mask(dsi, dsi->driver_data->reg_vm_cmd_off, TS_VFP_EN, TS_VFP_EN); } static void mtk_dsi_rxtx_control(struct mtk_dsi *dsi) @@ -714,7 +715,7 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) if (dsi->driver_data->has_shadow_ctl) writel(FORCE_COMMIT | BYPASS_SHADOW, - dsi->regs + DSI_SHADOW_DEBUG); + dsi->regs + dsi->driver_data->reg_shadow_dbg_off); mtk_dsi_reset_engine(dsi); mtk_dsi_phy_timconfig(dsi); @@ -1263,26 +1264,36 @@ static void mtk_dsi_remove(struct platform_device *pdev) static const struct mtk_dsi_driver_data mt8173_dsi_driver_data = { .reg_cmdq_off = 0x200, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190 }; static const struct mtk_dsi_driver_data mt2701_dsi_driver_data = { .reg_cmdq_off = 0x180, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190 }; static const struct mtk_dsi_driver_data mt8183_dsi_driver_data = { .reg_cmdq_off = 0x200, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190, .has_shadow_ctl = true, .has_size_ctl = true, }; static const struct mtk_dsi_driver_data mt8186_dsi_driver_data = { .reg_cmdq_off = 0xd00, + .reg_vm_cmd_off = 0x200, + .reg_shadow_dbg_off = 0xc00, .has_shadow_ctl = true, .has_size_ctl = true, }; static const struct mtk_dsi_driver_data mt8188_dsi_driver_data = { .reg_cmdq_off = 0xd00, + .reg_vm_cmd_off = 0x200, + .reg_shadow_dbg_off = 0xc00, .has_shadow_ctl = true, .has_size_ctl = true, .cmdq_long_packet_ctl = true, diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index f274d9430cc3..5df20cbeafb8 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -78,6 +78,7 @@ msm-display-$(CONFIG_DRM_MSM_DPU) += \ disp/dpu1/dpu_hw_catalog.o \ disp/dpu1/dpu_hw_cdm.o \ disp/dpu1/dpu_hw_ctl.o \ + disp/dpu1/dpu_hw_cwb.o \ disp/dpu1/dpu_hw_dsc.o \ disp/dpu1/dpu_hw_dsc_1_2.o \ disp/dpu1/dpu_hw_interrupts.o \ diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 50c490b492f0..f1b18a6663f7 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -251,8 +251,8 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); /* Disable L2 bypass to avoid UCHE out of bounds errors */ - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); @@ -693,6 +693,8 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) if (ret) goto fail; + adreno_gpu->uche_trap_base = 0xffff0000ffff0000ull; + if (!gpu->aspace) { /* TODO we think it is possible to configure the GPU to * restrict access to VRAM carveout. But the required diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index ee89db72e36e..71dca78cd7a5 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -750,10 +750,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); /* Disable L2 bypass in the UCHE */ - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); /* Set the GMEM VA range (0 to gpu->gmem) */ gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); @@ -1760,11 +1760,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) unsigned int nr_rings; int ret; - if (!pdev) { - DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); - return ERR_PTR(-ENXIO); - } - a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); if (!a5xx_gpu) return ERR_PTR(-ENOMEM); @@ -1805,5 +1800,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) adreno_gpu->ubwc_config.macrotile_mode = 0; adreno_gpu->ubwc_config.ubwc_swizzle = 0x7; + adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 0c560e84ad5a..edffb7737a97 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -1388,6 +1388,17 @@ static const struct adreno_info a7xx_gpus[] = { .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7020100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, .address_space_size = SZ_16G, .preempt_record_size = 4192 * SZ_1K, @@ -1432,6 +1443,17 @@ static const struct adreno_info a7xx_gpus[] = { .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(2), + .perfmode_bw = 10687500, + }, + { /* sentinel */ }, + }, }, .address_space_size = SZ_16G, .preempt_record_size = 3572 * SZ_1K, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 14db7376c712..65d38b25c070 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -9,6 +9,7 @@ #include <linux/pm_domain.h> #include <linux/pm_opp.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include <drm/drm_gem.h> #include "a6xx_gpu.h" @@ -109,9 +110,11 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u32 perf_index; + u32 bw_index = 0; unsigned long gpu_freq; int ret = 0; @@ -124,6 +127,37 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, if (gpu_freq == gmu->gpu_freqs[perf_index]) break; + /* If enabled, find the corresponding DDR bandwidth index */ + if (info->bcms && gmu->nr_gpu_bws > 1) { + unsigned int bw = dev_pm_opp_get_bw(opp, true, 0); + + for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) { + if (bw == gmu->gpu_bw_table[bw_index]) + break; + } + + /* Vote AB as a fraction of the max bandwidth, starting from A750 */ + if (bw && adreno_is_a750_family(adreno_gpu)) { + u64 tmp; + + /* For now, vote for 25% of the bandwidth */ + tmp = bw * 25; + do_div(tmp, 100); + + /* + * The AB vote consists of a 16 bit wide quantized level + * against the maximum supported bandwidth. + * Quantization can be calculated as below: + * vote = (bandwidth * 2^16) / max bandwidth + */ + tmp *= MAX_AB_VOTE; + do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]); + + bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE)); + bw_index |= AB_VOTE_ENABLE; + } + } + gmu->current_perf_index = perf_index; gmu->freq = gmu->gpu_freqs[perf_index]; @@ -139,8 +173,10 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, return; if (!gmu->legacy) { - a6xx_hfi_set_freq(gmu, perf_index); - dev_pm_opp_set_opp(&gpu->pdev->dev, opp); + a6xx_hfi_set_freq(gmu, perf_index, bw_index); + /* With Bandwidth voting, we now vote for all resources, so skip OPP set */ + if (!bw_index) + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); return; } @@ -729,6 +765,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU]; const struct block_header *blk; u32 reg_offset; + u32 ver; u32 itcm_base = 0x00000000; u32 dtcm_base = 0x00040000; @@ -775,6 +812,12 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } } + ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); + DRM_INFO("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + return 0; } @@ -1265,7 +1308,7 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, bo->virt = msm_gem_get_vaddr(bo->obj); bo->size = size; - msm_gem_object_set_name(bo->obj, name); + msm_gem_object_set_name(bo->obj, "%s", name); return 0; } @@ -1287,6 +1330,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) return 0; } +/** + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM) + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg + * @vcd: virtual clock domain that this bcm belongs to + * @reserved: reserved field + */ +struct bcm_db { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu, + const struct a6xx_info *info, + struct a6xx_gmu *gmu) +{ + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 }; + unsigned int bcm_index, bw_index, bcm_count = 0; + + /* Retrieve BCM data from cmd-db */ + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + size_t count; + + /* Stop at NULL terminated bcm entry */ + if (!bcm->name) + break; + + bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count); + if (IS_ERR(bcm_data[bcm_index])) + return PTR_ERR(bcm_data[bcm_index]); + + if (!count) { + dev_err(gmu->dev, "invalid BCM '%s' aux data size\n", + bcm->name); + return -EINVAL; + } + + bcm_count++; + } + + /* Generate BCM votes values for each bandwidth & BCM */ + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) { + u32 *data = gmu->gpu_ib_votes[bw_index]; + u32 bw = gmu->gpu_bw_table[bw_index]; + + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */ + for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + bool commit = false; + u64 peak; + u32 vote; + + if (bcm_index == bcm_count - 1 || + (bcm_data[bcm_index + 1] && + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd)) + commit = true; + + if (!bw) { + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0); + continue; + } + + if (bcm->fixed) { + u32 perfmode = 0; + + /* GMU on A6xx votes perfmode on all valid bandwidth */ + if (!adreno_is_a7xx(adreno_gpu) || + (bcm->perfmode_bw && bw >= bcm->perfmode_bw)) + perfmode = bcm->perfmode; + + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode); + continue; + } + + /* Multiply the bandwidth by the width of the connection */ + peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width); + do_div(peak, bcm->buswidth); + + /* Input bandwidth value is in KBps, scale the value to BCM unit */ + peak *= 1000; + do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit)); + + vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK); + + /* GMUs on A7xx votes on both x & y */ + if (adreno_is_a7xx(adreno_gpu)) + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote); + else + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote); + } + } + + return 0; +} + /* Return the 'arc-level' for the given frequency */ static unsigned int a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) @@ -1390,12 +1531,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, * The GMU votes with the RPMh for itself and on behalf of the GPU but we need * to construct the list of votes on the CPU and send it over. Query the RPMh * voltage levels and build the votes + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries + * and BCM parameters to build the votes. */ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret; @@ -1407,6 +1551,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl"); + /* Build the interconnect votes */ + if (info->bcms && gmu->nr_gpu_bws > 1) + ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu); + return ret; } @@ -1442,10 +1590,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, return index; } +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned int bandwidth = 1; + + /* + * The OPP table doesn't contain the "off" bandwidth level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU bandwidth table is being truncated\n")) + count = size - 1; + + /* Set the "off" bandwidth */ + bandwidths[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + bandwidths[index++] = bandwidth++; + } + + return index; +} + static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret = 0; @@ -1472,6 +1653,14 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) gmu->current_perf_index = gmu->nr_gpu_freqs - 1; + /* + * The GMU also handles GPU Interconnect Votes so build a list + * of DDR bandwidths from the GPU OPP table + */ + if (info->bcms) + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev, + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table)); + /* Build the list of RPMh votes that we'll send to the GMU */ return a6xx_gmu_rpmh_votes_init(gmu); } @@ -1603,7 +1792,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; pm_runtime_enable(gmu->dev); @@ -1668,7 +1859,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; /* Fow now, don't do anything fancy until we get our feet under us */ gmu->idle_level = GMU_IDLE_STATE_ACTIVE; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index b4a79f88ccf4..0c888b326cfb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -19,6 +19,18 @@ struct a6xx_gmu_bo { u64 iova; }; +#define GMU_MAX_GX_FREQS 16 +#define GMU_MAX_CX_FREQS 4 +#define GMU_MAX_BCMS 3 + +struct a6xx_bcm { + char *name; + unsigned int buswidth; + bool fixed; + unsigned int perfmode; + unsigned int perfmode_bw; +}; + /* * These define the different GMU wake up options - these define how both the * CPU and the GMU bring up the hardware @@ -79,12 +91,16 @@ struct a6xx_gmu { int current_perf_index; int nr_gpu_freqs; - unsigned long gpu_freqs[16]; - u32 gx_arc_votes[16]; + unsigned long gpu_freqs[GMU_MAX_GX_FREQS]; + u32 gx_arc_votes[GMU_MAX_GX_FREQS]; + + int nr_gpu_bws; + unsigned long gpu_bw_table[GMU_MAX_GX_FREQS]; + u32 gpu_ib_votes[GMU_MAX_GX_FREQS][GMU_MAX_BCMS]; int nr_gmu_freqs; - unsigned long gmu_freqs[4]; - u32 cx_arc_votes[4]; + unsigned long gmu_freqs[GMU_MAX_CX_FREQS]; + u32 cx_arc_votes[GMU_MAX_CX_FREQS]; unsigned long freq; @@ -193,7 +209,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); void a6xx_hfi_stop(struct a6xx_gmu *gmu); int a6xx_hfi_send_prep_slumber(struct a6xx_gmu *gmu); -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index); +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 perf_index, u32 bw_index); bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 019610341df1..0ae29a7c8a4d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1123,12 +1123,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -2533,6 +2533,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) } } + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 4aceffb6aae8..9201a53dd341 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -44,6 +44,7 @@ struct a6xx_info { u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; + const struct a6xx_bcm *bcms; }; struct a6xx_gpu { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index cb8844ed46b2..0989aee3dd2c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -6,6 +6,7 @@ #include <linux/list.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include "a6xx_gmu.h" #include "a6xx_gmu.xml.h" @@ -259,6 +260,48 @@ static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) NULL, 0); } +static void a6xx_generate_bw_table(const struct a6xx_info *info, struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_bw_table *msg) +{ + unsigned int i, j; + + for (i = 0; i < GMU_MAX_BCMS; i++) { + if (!info->bcms[i].name) + break; + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcms[i].name); + } + msg->ddr_cmds_num = i; + + for (i = 0; i < gmu->nr_gpu_bws; ++i) + for (j = 0; j < msg->ddr_cmds_num; j++) + msg->ddr_cmds_data[i][j] = gmu->gpu_ib_votes[i][j]; + msg->bw_level_num = gmu->nr_gpu_bws; + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->ddr_wait_bitmask = 0; + for (j = 0; j < msg->ddr_cmds_num; j++) + if (msg->ddr_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->ddr_wait_bitmask |= BIT(j); + + /* + * These are the CX (CNOC) votes - these are used by the GMU + * The 'CN0' BCM is used on all targets, and votes are basically + * 'off' and 'on' states with first bit to enable the path. + */ + + msg->cnoc_cmds_addrs[0] = cmd_db_read_addr("CN0"); + msg->cnoc_cmds_num = 1; + + msg->cnoc_cmds_data[0][0] = BCM_TCS_CMD(true, false, 0, 0); + msg->cnoc_cmds_data[1][0] = BCM_TCS_CMD(true, true, 0, BIT(0)); + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->cnoc_wait_bitmask = 0; + for (j = 0; j < msg->cnoc_cmds_num; j++) + if (msg->cnoc_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->cnoc_wait_bitmask |= BIT(j); +} + static void a618_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* Send a single "off" entry since the 618 GMU doesn't do bus scaling */ @@ -664,6 +707,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) struct a6xx_hfi_msg_bw_table *msg; struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; if (gmu->bw_table) goto send; @@ -672,7 +716,9 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) if (!msg) return -ENOMEM; - if (adreno_is_a618(adreno_gpu)) + if (info->bcms && gmu->nr_gpu_bws > 1) + a6xx_generate_bw_table(info, gmu, msg); + else if (adreno_is_a618(adreno_gpu)) a618_build_bw_table(msg); else if (adreno_is_a619(adreno_gpu)) a619_build_bw_table(msg); @@ -726,13 +772,13 @@ static int a6xx_hfi_send_core_fw_start(struct a6xx_gmu *gmu) sizeof(msg), NULL, 0); } -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index) +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 freq_index, u32 bw_index) { struct a6xx_hfi_gx_bw_perf_vote_cmd msg = { 0 }; msg.ack_type = 1; /* blocking */ - msg.freq = index; - msg.bw = 0; /* TODO: bus scaling */ + msg.freq = freq_index; + msg.bw = bw_index; return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_GX_BW_PERF_VOTE, &msg, sizeof(msg), NULL, 0); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h index 528110169398..52ba4a07d7b9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -173,6 +173,11 @@ struct a6xx_hfi_gx_bw_perf_vote_cmd { u32 bw; }; +#define AB_VOTE_MASK GENMASK(31, 16) +#define MAX_AB_VOTE (FIELD_MAX(AB_VOTE_MASK) - 1) +#define AB_VOTE(vote) FIELD_PREP(AB_VOTE_MASK, (vote)) +#define AB_VOTE_ENABLE BIT(8) + #define HFI_H2F_MSG_PREPARE_SLUMBER 33 struct a6xx_hfi_prep_slumber_cmd { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 75f5367e73ca..1238f3265978 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -310,10 +310,11 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t *value, uint32_t *len) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct drm_device *drm = gpu->dev; /* No pointer params yet */ if (*len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); switch (param) { case MSM_PARAM_GPU_ID: @@ -365,12 +366,12 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, return 0; case MSM_PARAM_VA_START: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_start; return 0; case MSM_PARAM_VA_SIZE: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_size; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: @@ -385,15 +386,19 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_MACROTILE_MODE: *value = adreno_gpu->ubwc_config.macrotile_mode; return 0; + case MSM_PARAM_UCHE_TRAP_BASE: + *value = adreno_gpu->uche_trap_base; + return 0; default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t value, uint32_t len) { + struct drm_device *drm = gpu->dev; + switch (param) { case MSM_PARAM_COMM: case MSM_PARAM_CMDLINE: @@ -401,11 +406,11 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, * that should be a reasonable upper bound */ if (len > PAGE_SIZE) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); break; default: if (len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); } switch (param) { @@ -434,11 +439,10 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, } case MSM_PARAM_SYSPROF: if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return UERR(EPERM, drm, "invalid permissions"); return msm_file_private_set_sysprof(ctx, gpu, value); default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e71f420f8b3a..dcf454629ce0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -253,6 +253,8 @@ struct adreno_gpu { bool gmu_is_wrapper; bool has_ray_tracing; + + u64 uche_trap_base; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) @@ -559,6 +561,11 @@ static inline int adreno_is_a740_family(struct adreno_gpu *gpu) gpu->info->family == ADRENO_7XX_GEN3; } +static inline int adreno_is_a750_family(struct adreno_gpu *gpu) +{ + return gpu->info->family == ADRENO_7XX_GEN3; +} + static inline int adreno_is_a7xx(struct adreno_gpu *gpu) { /* Update with non-fake (i.e. non-A702) Gen 7 GPUs */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h index eb5dfff2ec4f..bcb39807fe61 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x400, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x400, @@ -252,25 +254,25 @@ static const struct dpu_pingpong_cfg sm8650_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_8", .id = PINGPONG_8, + .name = "pingpong_cwb_2", .id = PINGPONG_CWB_2, .base = 0x7e000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_4, }, { - .name = "pingpong_9", .id = PINGPONG_9, + .name = "pingpong_cwb_3", .id = PINGPONG_CWB_3, .base = 0x7e400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, @@ -350,6 +352,25 @@ static const struct dpu_wb_cfg sm8650_wb[] = { }, }; +static const struct dpu_cwb_cfg sm8650_cwb[] = { + { + .name = "cwb_0", .id = CWB_0, + .base = 0x66200, .len = 0x8, + }, + { + .name = "cwb_1", .id = CWB_1, + .base = 0x66600, .len = 0x8, + }, + { + .name = "cwb_2", .id = CWB_2, + .base = 0x7E200, .len = 0x8, + }, + { + .name = "cwb_3", .id = CWB_3, + .base = 0x7E600, .len = 0x8, + }, +}; + static const struct dpu_intf_cfg sm8650_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -447,6 +468,8 @@ const struct dpu_mdss_cfg dpu_sm8650_cfg = { .merge_3d = sm8650_merge_3d, .wb_count = ARRAY_SIZE(sm8650_wb), .wb = sm8650_wb, + .cwb_count = ARRAY_SIZE(sm8650_cwb), + .cwb = sm8650_cwb, .intf_count = ARRAY_SIZE(sm8650_intf), .intf = sm8650_intf, .vbif_count = ARRAY_SIZE(sm8650_vbif), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h index cbbdaebe357e..daef07924886 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h @@ -65,6 +65,54 @@ static const struct dpu_sspp_cfg sdm670_sspp[] = { }, }; +static const struct dpu_lm_cfg sdm670_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_1, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_0, + .pingpong = PINGPONG_1, + .dspp = DSPP_1, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_5, + .pingpong = PINGPONG_2, + }, { + .name = "lm_5", .id = LM_5, + .base = 0x49000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, +}; + +static const struct dpu_dspp_cfg sdm670_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_1", .id = DSPP_1, + .base = 0x56000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + static const struct dpu_dsc_cfg sdm670_dsc[] = { { .name = "dsc_0", .id = DSC_0, @@ -88,8 +136,10 @@ const struct dpu_mdss_cfg dpu_sdm670_cfg = { .ctl = sdm845_ctl, .sspp_count = ARRAY_SIZE(sdm670_sspp), .sspp = sdm670_sspp, - .mixer_count = ARRAY_SIZE(sdm845_lm), - .mixer = sdm845_lm, + .mixer_count = ARRAY_SIZE(sdm670_lm), + .mixer = sdm670_lm, + .dspp_count = ARRAY_SIZE(sdm670_dspp), + .dspp = sdm670_dspp, .pingpong_count = ARRAY_SIZE(sdm845_pp), .pingpong = sdm845_pp, .dsc_count = ARRAY_SIZE(sdm670_dsc), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 6ccfde82fecd..421afacb7248 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -164,6 +164,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -171,6 +172,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index bab19ddd1d4f..641023b102bf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -163,6 +163,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -170,6 +171,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h new file mode 100644 index 000000000000..621a2140f675 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DPU_5_3_SM6150_H +#define _DPU_5_3_SM6150_H + +static const struct dpu_caps sm6150_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0x9, + .has_dim_layer = true, + .has_idle_pc = true, + .max_linewidth = 2160, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, + .max_hdeci_exp = MAX_HORZ_DECIMATION, + .max_vdeci_exp = MAX_VERT_DECIMATION, +}; + +static const struct dpu_mdp_cfg sm6150_mdp = { + .name = "top_0", + .base = 0x0, .len = 0x45c, + .features = 0, + .clk_ctrls = { + [DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 }, + [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + }, +}; + +static const struct dpu_ctl_cfg sm6150_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x1000, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, { + .name = "ctl_1", .id = CTL_1, + .base = 0x1200, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, { + .name = "ctl_2", .id = CTL_2, + .base = 0x1400, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, { + .name = "ctl_3", .id = CTL_3, + .base = 0x1600, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12), + }, { + .name = "ctl_4", .id = CTL_4, + .base = 0x1800, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13), + }, { + .name = "ctl_5", .id = CTL_5, + .base = 0x1a00, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23), + }, +}; + +static const struct dpu_sspp_cfg sm6150_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x1f0, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_2_4, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x1f0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA0, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x1f0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA1, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x1f0, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA2, + }, { + .name = "sspp_11", .id = SSPP_DMA3, + .base = 0x2a000, .len = 0x1f0, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 13, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA3, + }, +}; + +static const struct dpu_lm_cfg sm6150_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + .lm_pair = LM_1, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_1, + .lm_pair = LM_0, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_2, + }, +}; + +static const struct dpu_dspp_cfg sm6150_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + +static const struct dpu_pingpong_cfg sm6150_pp[] = { + { + .name = "pingpong_0", .id = PINGPONG_0, + .base = 0x70000, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), + }, { + .name = "pingpong_1", .id = PINGPONG_1, + .base = 0x70800, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), + }, { + .name = "pingpong_2", .id = PINGPONG_2, + .base = 0x71000, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), + }, +}; + +static const struct dpu_intf_cfg sm6150_intf[] = { + { + .name = "intf_0", .id = INTF_0, + .base = 0x6a000, .len = 0x280, + .features = INTF_SC7180_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), + }, { + .name = "intf_1", .id = INTF_1, + .base = 0x6a800, .len = 0x2c0, + .features = INTF_SC7180_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2), + }, { + .name = "intf_3", .id = INTF_3, + .base = 0x6b800, .len = 0x280, + .features = INTF_SC7180_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), + }, +}; + +static const struct dpu_perf_cfg sm6150_perf_data = { + .max_bw_low = 4800000, + .max_bw_high = 4800000, + .min_core_ib = 2400000, + .min_llcc_ib = 0, + .min_dram_ib = 800000, + .min_prefill_lines = 24, + .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff8, 0xf000, 0xffff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sm8150_qos_linear), + .entries = sm8150_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + +static const struct dpu_mdss_version sm6150_mdss_ver = { + .core_major_ver = 5, + .core_minor_ver = 3, +}; + +const struct dpu_mdss_cfg dpu_sm6150_cfg = { + .mdss_ver = &sm6150_mdss_ver, + .caps = &sm6150_dpu_caps, + .mdp = &sm6150_mdp, + .ctl_count = ARRAY_SIZE(sm6150_ctl), + .ctl = sm6150_ctl, + .sspp_count = ARRAY_SIZE(sm6150_sspp), + .sspp = sm6150_sspp, + .mixer_count = ARRAY_SIZE(sm6150_lm), + .mixer = sm6150_lm, + .dspp_count = ARRAY_SIZE(sm6150_dspp), + .dspp = sm6150_dspp, + .pingpong_count = ARRAY_SIZE(sm6150_pp), + .pingpong = sm6150_pp, + .intf_count = ARRAY_SIZE(sm6150_intf), + .intf = sm6150_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .perf = &sm6150_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index a57d50b1f028..e8916ae826a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index aced16e350da..f7c08e89c882 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index a1779c5597ae..08742472f9cc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -257,13 +257,13 @@ static const struct dpu_pingpong_cfg sm8450_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x65800, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x65c00, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h index 907b4d7ceb47..76ec72a32378 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h @@ -256,13 +256,13 @@ static const struct dpu_pingpong_cfg sa8775p_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_6", .id = PINGPONG_CWB_0, .base = 0x65800, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_7", .id = PINGPONG_CWB_1, .base = 0x65c00, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index ad48defa154f..4d3787fceb72 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -251,13 +253,13 @@ static const struct dpu_pingpong_cfg sm8550_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index a3e60ac70689..6b112e3d17da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h @@ -159,6 +159,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -166,6 +167,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -251,13 +253,13 @@ static const struct dpu_pingpong_cfg x1e80100_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, @@ -389,8 +391,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .type = INTF_DP, .controller_id = MSM_DP_CONTROLLER_2, .prog_fetch_lines_worst_case = 24, - .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), - .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), }, { .name = "intf_7", .id = INTF_7, .base = 0x3b000, .len = 0x280, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 9f6ffd344693..7191b1a6d41b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -732,6 +732,13 @@ static int _dpu_crtc_check_and_setup_lm_bounds(struct drm_crtc *crtc, struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); int i; + /* if we cannot merge 2 LMs (no 3d mux) better to fail earlier + * before even checking the width after the split + */ + if (!dpu_kms->catalog->caps->has_3d_merge && + adj_mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) + return -E2BIG; + for (i = 0; i < cstate->num_mixers; i++) { struct drm_rect *r = &cstate->lm_bounds[i]; r->x1 = crtc_split_width * i; @@ -1182,6 +1189,49 @@ static bool dpu_crtc_needs_dirtyfb(struct drm_crtc_state *cstate) return false; } +static int dpu_crtc_reassign_planes(struct drm_crtc *crtc, struct drm_crtc_state *crtc_state) +{ + int total_planes = crtc->dev->mode_config.num_total_plane; + struct drm_atomic_state *state = crtc_state->state; + struct dpu_global_state *global_state; + struct drm_plane_state **states; + struct drm_plane *plane; + int ret; + + global_state = dpu_kms_get_global_state(crtc_state->state); + if (IS_ERR(global_state)) + return PTR_ERR(global_state); + + dpu_rm_release_all_sspp(global_state, crtc); + + if (!crtc_state->enable) + return 0; + + states = kcalloc(total_planes, sizeof(*states), GFP_KERNEL); + if (!states) + return -ENOMEM; + + drm_atomic_crtc_state_for_each_plane(plane, crtc_state) { + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(state, plane); + + if (IS_ERR(plane_state)) { + ret = PTR_ERR(plane_state); + goto done; + } + + states[plane_state->normalized_zpos] = plane_state; + } + + ret = dpu_assign_plane_resources(global_state, state, crtc, states, total_planes); + +done: + kfree(states); + return ret; + + return 0; +} + static int dpu_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -1197,6 +1247,13 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, bool needs_dirtyfb = dpu_crtc_needs_dirtyfb(crtc_state); + if (dpu_use_virtual_planes && + (crtc_state->planes_changed || crtc_state->zpos_changed)) { + rc = dpu_crtc_reassign_planes(crtc, crtc_state); + if (rc < 0) + return rc; + } + if (!crtc_state->enable || !drm_atomic_crtc_effectively_active(crtc_state)) { DRM_DEBUG_ATOMIC("crtc%d -> enable %d, active %d, skip atomic_check\n", crtc->base.id, crtc_state->enable, @@ -1251,6 +1308,12 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, { struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); + /* if there is no 3d_mux block we cannot merge LMs so we cannot + * split the large layer into 2 LMs, filter out such modes + */ + if (!dpu_kms->catalog->caps->has_3d_merge && + mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) + return MODE_BAD_HVALUE; /* * max crtc width is equal to the max mixer width * 2 and max height is 4K */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 83de7564e2c1..5172ab4dea99 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -800,7 +800,7 @@ static int dpu_encoder_virt_atomic_check( if (!crtc_state->active_changed || crtc_state->enable) ret = dpu_rm_reserve(&dpu_kms->rm, global_state, - drm_enc, crtc_state, topology); + drm_enc, crtc_state, &topology); if (!ret) dpu_encoder_assign_crtc_resources(dpu_kms, drm_enc, global_state, crtc_state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 2cbf41f33cc0..0b342c043875 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -765,6 +765,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_5_0_sm8150.h" #include "catalog/dpu_5_1_sc8180x.h" #include "catalog/dpu_5_2_sm7150.h" +#include "catalog/dpu_5_3_sm6150.h" #include "catalog/dpu_5_4_sm6125.h" #include "catalog/dpu_6_0_sm8250.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index c701d18c3522..4cea19e1a203 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -613,6 +613,16 @@ struct dpu_wb_cfg { enum dpu_clk_ctrl_type clk_ctrl; }; +/* + * struct dpu_cwb_cfg : MDP CWB mux instance info + * @id: enum identifying this block + * @base: register base offset to mdss + * @features bit mask identifying sub-blocks/features + */ +struct dpu_cwb_cfg { + DPU_HW_BLK_INFO; +}; + /** * struct dpu_vbif_dynamic_ot_cfg - dynamic OT setting * @pps pixel per seconds @@ -815,6 +825,9 @@ struct dpu_mdss_cfg { u32 dspp_count; const struct dpu_dspp_cfg *dspp; + u32 cwb_count; + const struct dpu_cwb_cfg *cwb; + /* Add additional block data structures here */ const struct dpu_perf_cfg *perf; @@ -839,6 +852,7 @@ extern const struct dpu_mdss_cfg dpu_sm8250_cfg; extern const struct dpu_mdss_cfg dpu_sc7180_cfg; extern const struct dpu_mdss_cfg dpu_sm6115_cfg; extern const struct dpu_mdss_cfg dpu_sm6125_cfg; +extern const struct dpu_mdss_cfg dpu_sm6150_cfg; extern const struct dpu_mdss_cfg dpu_sm6350_cfg; extern const struct dpu_mdss_cfg dpu_qcm2290_cfg; extern const struct dpu_mdss_cfg dpu_sm6375_cfg; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c new file mode 100644 index 000000000000..ae785f4ff0d4 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#include <drm/drm_managed.h> +#include "dpu_hw_cwb.h" + +#include <linux/bitfield.h> + +#define CWB_MUX 0x000 +#define CWB_MODE 0x004 + +/* CWB mux block bit definitions */ +#define CWB_MUX_MASK GENMASK(3, 0) +#define CWB_MODE_MASK GENMASK(2, 0) + +static void dpu_hw_cwb_config(struct dpu_hw_cwb *ctx, + struct dpu_hw_cwb_setup_cfg *cwb_cfg) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + int cwb_mux_cfg = 0xF; + enum dpu_pingpong pp; + enum cwb_mode_input input; + + if (!cwb_cfg) + return; + + input = cwb_cfg->input; + pp = cwb_cfg->pp_idx; + + if (input >= INPUT_MODE_MAX) + return; + + /* + * The CWB_MUX register takes the pingpong index for the real-time + * display + */ + if ((pp != PINGPONG_NONE) && (pp < PINGPONG_MAX)) + cwb_mux_cfg = FIELD_PREP(CWB_MUX_MASK, pp - PINGPONG_0); + + input = FIELD_PREP(CWB_MODE_MASK, input); + + DPU_REG_WRITE(c, CWB_MUX, cwb_mux_cfg); + DPU_REG_WRITE(c, CWB_MODE, input); +} + +/** + * dpu_hw_cwb_init() - Initializes the writeback hw driver object with cwb. + * @dev: Corresponding device for devres management + * @cfg: wb_path catalog entry for which driver object is required + * @addr: mapped register io address of MDP + * Return: Error code or allocated dpu_hw_wb context + */ +struct dpu_hw_cwb *dpu_hw_cwb_init(struct drm_device *dev, + const struct dpu_cwb_cfg *cfg, + void __iomem *addr) +{ + struct dpu_hw_cwb *c; + + if (!addr) + return ERR_PTR(-EINVAL); + + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); + if (!c) + return ERR_PTR(-ENOMEM); + + c->hw.blk_addr = addr + cfg->base; + c->hw.log_mask = DPU_DBG_MASK_CWB; + + c->idx = cfg->id; + c->ops.config_cwb = dpu_hw_cwb_config; + + return c; +} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h new file mode 100644 index 000000000000..96b6edf6b2bb --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#ifndef _DPU_HW_CWB_H +#define _DPU_HW_CWB_H + +#include "dpu_hw_util.h" + +struct dpu_hw_cwb; + +enum cwb_mode_input { + INPUT_MODE_LM_OUT, + INPUT_MODE_DSPP_OUT, + INPUT_MODE_MAX +}; + +/** + * struct dpu_hw_cwb_setup_cfg : Describes configuration for CWB mux + * @pp_idx: Index of the real-time pinpong that the CWB mux will + * feed the CWB mux + * @input: Input tap point + */ +struct dpu_hw_cwb_setup_cfg { + enum dpu_pingpong pp_idx; + enum cwb_mode_input input; +}; + +/** + * + * struct dpu_hw_cwb_ops : Interface to the cwb hw driver functions + * @config_cwb: configure CWB mux + */ +struct dpu_hw_cwb_ops { + void (*config_cwb)(struct dpu_hw_cwb *ctx, + struct dpu_hw_cwb_setup_cfg *cwb_cfg); +}; + +/** + * struct dpu_hw_cwb : CWB mux driver object + * @base: Hardware block base structure + * @hw: Block hardware details + * @idx: CWB index + * @ops: handle to operations possible for this CWB + */ +struct dpu_hw_cwb { + struct dpu_hw_blk base; + struct dpu_hw_blk_reg_map hw; + + enum dpu_cwb idx; + + struct dpu_hw_cwb_ops ops; +}; + +/** + * dpu_hw_cwb - convert base object dpu_hw_base to container + * @hw: Pointer to base hardware block + * return: Pointer to hardware block container + */ +static inline struct dpu_hw_cwb *to_dpu_hw_cwb(struct dpu_hw_blk *hw) +{ + return container_of(hw, struct dpu_hw_cwb, base); +} + +struct dpu_hw_cwb *dpu_hw_cwb_init(struct drm_device *dev, + const struct dpu_cwb_cfg *cfg, + void __iomem *addr); + +#endif /*_DPU_HW_CWB_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index f8806a4d317b..ba7bb05efe9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ #ifndef _DPU_HW_MDSS_H @@ -181,10 +183,10 @@ enum dpu_pingpong { PINGPONG_3, PINGPONG_4, PINGPONG_5, - PINGPONG_6, - PINGPONG_7, - PINGPONG_8, - PINGPONG_9, + PINGPONG_CWB_0, + PINGPONG_CWB_1, + PINGPONG_CWB_2, + PINGPONG_CWB_3, PINGPONG_S0, PINGPONG_MAX }; @@ -350,6 +352,7 @@ struct dpu_mdss_color { #define DPU_DBG_MASK_DSPP (1 << 10) #define DPU_DBG_MASK_DSC (1 << 11) #define DPU_DBG_MASK_CDM (1 << 12) +#define DPU_DBG_MASK_CWB (1 << 13) /** * struct dpu_hw_tear_check - Struct contains parameters to configure diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index fb9f90957762..4853e516c487 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -173,7 +173,9 @@ static void dpu_hw_wb_bind_pingpong_blk( mux_cfg = DPU_REG_READ(c, WB_MUX); mux_cfg &= ~0xf; - if (pp) + if (pp >= PINGPONG_CWB_0) + mux_cfg |= (pp < PINGPONG_CWB_2) ? 0xd : 0xb; + else if (pp) mux_cfg |= (pp - PINGPONG_0) & 0x7; else mux_cfg |= 0xf; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 8b251f87a052..97e9cb8c2b09 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -51,6 +51,9 @@ #define DPU_DEBUGFS_DIR "msm_dpu" #define DPU_DEBUGFS_HWMASKNAME "hw_log_mask" +bool dpu_use_virtual_planes; +module_param(dpu_use_virtual_planes, bool, 0); + static int dpu_kms_hw_init(struct msm_kms *kms); static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms); @@ -829,8 +832,11 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms) type, catalog->sspp[i].features, catalog->sspp[i].features & BIT(DPU_SSPP_CURSOR)); - plane = dpu_plane_init(dev, catalog->sspp[i].id, type, - (1UL << max_crtc_count) - 1); + if (dpu_use_virtual_planes) + plane = dpu_plane_init_virtual(dev, type, (1UL << max_crtc_count) - 1); + else + plane = dpu_plane_init(dev, catalog->sspp[i].id, type, + (1UL << max_crtc_count) - 1); if (IS_ERR(plane)) { DPU_ERROR("dpu_plane_init failed\n"); ret = PTR_ERR(plane); @@ -932,12 +938,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump CTL sub-blocks HW regs info */ for (i = 0; i < cat->ctl_count; i++) msm_disp_snapshot_add_block(disp_state, cat->ctl[i].len, - dpu_kms->mmio + cat->ctl[i].base, cat->ctl[i].name); + dpu_kms->mmio + cat->ctl[i].base, "%s", + cat->ctl[i].name); /* dump DSPP sub-blocks HW regs info */ for (i = 0; i < cat->dspp_count; i++) { base = dpu_kms->mmio + cat->dspp[i].base; - msm_disp_snapshot_add_block(disp_state, cat->dspp[i].len, base, cat->dspp[i].name); + msm_disp_snapshot_add_block(disp_state, cat->dspp[i].len, base, + "%s", cat->dspp[i].name); if (cat->dspp[i].sblk && cat->dspp[i].sblk->pcc.len > 0) msm_disp_snapshot_add_block(disp_state, cat->dspp[i].sblk->pcc.len, @@ -949,13 +957,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump INTF sub-blocks HW regs info */ for (i = 0; i < cat->intf_count; i++) msm_disp_snapshot_add_block(disp_state, cat->intf[i].len, - dpu_kms->mmio + cat->intf[i].base, cat->intf[i].name); + dpu_kms->mmio + cat->intf[i].base, "%s", + cat->intf[i].name); /* dump PP sub-blocks HW regs info */ for (i = 0; i < cat->pingpong_count; i++) { base = dpu_kms->mmio + cat->pingpong[i].base; msm_disp_snapshot_add_block(disp_state, cat->pingpong[i].len, base, - cat->pingpong[i].name); + "%s", cat->pingpong[i].name); /* TE2 sub-block has length of 0, so will not print it */ @@ -969,7 +978,8 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump SSPP sub-blocks HW regs info */ for (i = 0; i < cat->sspp_count; i++) { base = dpu_kms->mmio + cat->sspp[i].base; - msm_disp_snapshot_add_block(disp_state, cat->sspp[i].len, base, cat->sspp[i].name); + msm_disp_snapshot_add_block(disp_state, cat->sspp[i].len, base, + "%s", cat->sspp[i].name); if (cat->sspp[i].sblk && cat->sspp[i].sblk->scaler_blk.len > 0) msm_disp_snapshot_add_block(disp_state, cat->sspp[i].sblk->scaler_blk.len, @@ -987,12 +997,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump LM sub-blocks HW regs info */ for (i = 0; i < cat->mixer_count; i++) msm_disp_snapshot_add_block(disp_state, cat->mixer[i].len, - dpu_kms->mmio + cat->mixer[i].base, cat->mixer[i].name); + dpu_kms->mmio + cat->mixer[i].base, + "%s", cat->mixer[i].name); /* dump WB sub-blocks HW regs info */ for (i = 0; i < cat->wb_count; i++) msm_disp_snapshot_add_block(disp_state, cat->wb[i].len, - dpu_kms->mmio + cat->wb[i].base, cat->wb[i].name); + dpu_kms->mmio + cat->wb[i].base, "%s", + cat->wb[i].name); if (cat->mdp[0].features & BIT(DPU_MDP_PERIPH_0_REMOVED)) { msm_disp_snapshot_add_block(disp_state, MDP_PERIPH_TOP0, @@ -1004,10 +1016,16 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k dpu_kms->mmio + cat->mdp[0].base, "top"); } + /* dump CWB sub-blocks HW regs info */ + for (i = 0; i < cat->cwb_count; i++) + msm_disp_snapshot_add_block(disp_state, cat->cwb[i].len, + dpu_kms->mmio + cat->cwb[i].base, cat->cwb[i].name); + /* dump DSC sub-blocks HW regs info */ for (i = 0; i < cat->dsc_count; i++) { base = dpu_kms->mmio + cat->dsc[i].base; - msm_disp_snapshot_add_block(disp_state, cat->dsc[i].len, base, cat->dsc[i].name); + msm_disp_snapshot_add_block(disp_state, cat->dsc[i].len, base, + "%s", cat->dsc[i].name); if (cat->dsc[i].features & BIT(DPU_DSC_HW_REV_1_2)) { struct dpu_dsc_blk enc = cat->dsc[i].sblk->enc; @@ -1022,7 +1040,16 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k if (cat->cdm) msm_disp_snapshot_add_block(disp_state, cat->cdm->len, - dpu_kms->mmio + cat->cdm->base, cat->cdm->name); + dpu_kms->mmio + cat->cdm->base, + "%s", cat->cdm->name); + + for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { + const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; + + msm_disp_snapshot_add_block(disp_state, vbif->len, + dpu_kms->vbif[vbif->id] + vbif->base, + "%s", vbif->name); + } pm_runtime_put_sync(&dpu_kms->pdev->dev); } @@ -1478,6 +1505,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sc8280xp-dpu", .data = &dpu_sc8280xp_cfg, }, { .compatible = "qcom,sm6115-dpu", .data = &dpu_sm6115_cfg, }, { .compatible = "qcom,sm6125-dpu", .data = &dpu_sm6125_cfg, }, + { .compatible = "qcom,sm6150-dpu", .data = &dpu_sm6150_cfg, }, { .compatible = "qcom,sm6350-dpu", .data = &dpu_sm6350_cfg, }, { .compatible = "qcom,sm6375-dpu", .data = &dpu_sm6375_cfg, }, { .compatible = "qcom,sm7150-dpu", .data = &dpu_sm7150_cfg, }, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 88d64d43ea1a..547cdb2c0c78 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -54,6 +54,8 @@ #define ktime_compare_safe(A, B) \ ktime_compare(ktime_sub((A), (B)), ktime_set(0, 0)) +extern bool dpu_use_virtual_planes; + struct dpu_kms { struct msm_kms base; struct drm_device *dev; @@ -128,6 +130,8 @@ struct dpu_global_state { uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0]; uint32_t dsc_to_enc_id[DSC_MAX - DSC_0]; uint32_t cdm_to_enc_id; + + uint32_t sspp_to_crtc_id[SSPP_MAX - SSPP_NONE]; }; struct dpu_global_state diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 3ffac24333a2..098abc2c0003 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -20,7 +20,6 @@ #include "msm_drv.h" #include "msm_mdss.h" #include "dpu_kms.h" -#include "dpu_formats.h" #include "dpu_hw_sspp.h" #include "dpu_hw_util.h" #include "dpu_trace.h" @@ -878,7 +877,7 @@ static int dpu_plane_atomic_check_nosspp(struct drm_plane *plane, drm_rect_rotate_inv(&pipe_cfg->src_rect, new_plane_state->fb->width, new_plane_state->fb->height, new_plane_state->rotation); - if (r_pipe_cfg->src_rect.x1 != 0) + if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) drm_rect_rotate_inv(&r_pipe_cfg->src_rect, new_plane_state->fb->width, new_plane_state->fb->height, new_plane_state->rotation); @@ -888,6 +887,32 @@ static int dpu_plane_atomic_check_nosspp(struct drm_plane *plane, return 0; } +static int dpu_plane_is_multirect_parallel_capable(struct dpu_hw_sspp *sspp, + struct dpu_sw_pipe_cfg *pipe_cfg, + const struct msm_format *fmt, + uint32_t max_linewidth) +{ + if (drm_rect_width(&pipe_cfg->src_rect) != drm_rect_width(&pipe_cfg->dst_rect) || + drm_rect_height(&pipe_cfg->src_rect) != drm_rect_height(&pipe_cfg->dst_rect)) + return false; + + if (pipe_cfg->rotation & DRM_MODE_ROTATE_90) + return false; + + if (MSM_FORMAT_IS_YUV(fmt)) + return false; + + if (MSM_FORMAT_IS_UBWC(fmt) && + drm_rect_width(&pipe_cfg->src_rect) > max_linewidth / 2) + return false; + + if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) && + !test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features)) + return false; + + return true; +} + static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, struct drm_atomic_state *state, const struct drm_crtc_state *crtc_state) @@ -901,7 +926,6 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, const struct msm_format *fmt; struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; - uint32_t max_linewidth; uint32_t supported_rotations; const struct dpu_sspp_cfg *pipe_hw_caps; const struct dpu_sspp_sub_blks *sblk; @@ -923,8 +947,6 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, fmt = msm_framebuffer_format(new_plane_state->fb); - max_linewidth = pdpu->catalog->caps->max_linewidth; - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) @@ -940,48 +962,43 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, return ret; if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { - /* - * In parallel multirect case only the half of the usual width - * is supported for tiled formats. If we are here, we know that - * full width is more than max_linewidth, thus each rect is - * wider than allowed. - */ - if (MSM_FORMAT_IS_UBWC(fmt) && - drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { - DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n", - DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); - return -E2BIG; - } + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, + &crtc_state->adjusted_mode); + if (ret) + return ret; + } - if (drm_rect_width(&pipe_cfg->src_rect) != drm_rect_width(&pipe_cfg->dst_rect) || - drm_rect_height(&pipe_cfg->src_rect) != drm_rect_height(&pipe_cfg->dst_rect) || - (!test_bit(DPU_SSPP_SMART_DMA_V1, &pipe->sspp->cap->features) && - !test_bit(DPU_SSPP_SMART_DMA_V2, &pipe->sspp->cap->features)) || - pipe_cfg->rotation & DRM_MODE_ROTATE_90 || - MSM_FORMAT_IS_YUV(fmt)) { - DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, can't use split source\n", - DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); - return -E2BIG; - } + return 0; +} + +static bool dpu_plane_try_multirect_parallel(struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, + struct dpu_sw_pipe *r_pipe, struct dpu_sw_pipe_cfg *r_pipe_cfg, + struct dpu_hw_sspp *sspp, const struct msm_format *fmt, + uint32_t max_linewidth) +{ + r_pipe->sspp = NULL; + + pipe->multirect_index = DPU_SSPP_RECT_SOLO; + pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; + r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { + if (!dpu_plane_is_multirect_parallel_capable(pipe->sspp, pipe_cfg, fmt, max_linewidth) || + !dpu_plane_is_multirect_parallel_capable(pipe->sspp, r_pipe_cfg, fmt, max_linewidth)) + return false; + + r_pipe->sspp = pipe->sspp; - /* - * Use multirect for wide plane. We do not support dynamic - * assignment of SSPPs, so we know the configuration. - */ pipe->multirect_index = DPU_SSPP_RECT_0; pipe->multirect_mode = DPU_SSPP_MULTIRECT_PARALLEL; - r_pipe->sspp = pipe->sspp; r_pipe->multirect_index = DPU_SSPP_RECT_1; r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_PARALLEL; - - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, - &crtc_state->adjusted_mode); - if (ret) - return ret; } - return 0; + return true; } static int dpu_plane_atomic_check(struct drm_plane *plane, @@ -995,14 +1012,19 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; + struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; + struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; const struct drm_crtc_state *crtc_state = NULL; + uint32_t max_linewidth = dpu_kms->catalog->caps->max_linewidth; if (new_plane_state->crtc) crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_state->crtc); pipe->sspp = dpu_rm_get_sspp(&dpu_kms->rm, pdpu->pipe); - r_pipe->sspp = NULL; + + if (!pipe->sspp) + return -EINVAL; ret = dpu_plane_atomic_check_nosspp(plane, new_plane_state, crtc_state); if (ret) @@ -1011,14 +1033,155 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, if (!new_plane_state->visible) return 0; - pipe->multirect_index = DPU_SSPP_RECT_SOLO; - pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; - r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; - r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + if (!dpu_plane_try_multirect_parallel(pipe, pipe_cfg, r_pipe, r_pipe_cfg, + pipe->sspp, + msm_framebuffer_format(new_plane_state->fb), + max_linewidth)) { + DPU_DEBUG_PLANE(pdpu, "invalid " DRM_RECT_FMT " /" DRM_RECT_FMT + " max_line:%u, can't use split source\n", + DRM_RECT_ARG(&pipe_cfg->src_rect), + DRM_RECT_ARG(&r_pipe_cfg->src_rect), + max_linewidth); + return -E2BIG; + } return dpu_plane_atomic_check_sspp(plane, state, crtc_state); } +static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(state, plane); + struct drm_plane_state *old_plane_state = + drm_atomic_get_old_plane_state(state, plane); + struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); + struct drm_crtc_state *crtc_state; + int ret; + + if (plane_state->crtc) + crtc_state = drm_atomic_get_new_crtc_state(state, + plane_state->crtc); + + ret = dpu_plane_atomic_check_nosspp(plane, plane_state, crtc_state); + if (ret) + return ret; + + if (!plane_state->visible) { + /* + * resources are freed by dpu_crtc_assign_plane_resources(), + * but clean them here. + */ + pstate->pipe.sspp = NULL; + pstate->r_pipe.sspp = NULL; + + return 0; + } + + /* + * Force resource reallocation if the format of FB or src/dst have + * changed. We might need to allocate different SSPP or SSPPs for this + * plane than the one used previously. + */ + if (!old_plane_state || !old_plane_state->fb || + old_plane_state->src_w != plane_state->src_w || + old_plane_state->src_h != plane_state->src_h || + old_plane_state->src_w != plane_state->src_w || + old_plane_state->crtc_h != plane_state->crtc_h || + msm_framebuffer_format(old_plane_state->fb) != + msm_framebuffer_format(plane_state->fb)) + crtc_state->planes_changed = true; + + return 0; +} + +static int dpu_plane_virtual_assign_resources(struct drm_crtc *crtc, + struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_plane_state *plane_state) +{ + const struct drm_crtc_state *crtc_state = NULL; + struct drm_plane *plane = plane_state->plane; + struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); + struct dpu_rm_sspp_requirements reqs; + struct dpu_plane_state *pstate; + struct dpu_sw_pipe *pipe; + struct dpu_sw_pipe *r_pipe; + struct dpu_sw_pipe_cfg *pipe_cfg; + struct dpu_sw_pipe_cfg *r_pipe_cfg; + const struct msm_format *fmt; + + if (plane_state->crtc) + crtc_state = drm_atomic_get_new_crtc_state(state, + plane_state->crtc); + + pstate = to_dpu_plane_state(plane_state); + pipe = &pstate->pipe; + r_pipe = &pstate->r_pipe; + pipe_cfg = &pstate->pipe_cfg; + r_pipe_cfg = &pstate->r_pipe_cfg; + + pipe->sspp = NULL; + r_pipe->sspp = NULL; + + if (!plane_state->fb) + return -EINVAL; + + fmt = msm_framebuffer_format(plane_state->fb); + reqs.yuv = MSM_FORMAT_IS_YUV(fmt); + reqs.scale = (plane_state->src_w >> 16 != plane_state->crtc_w) || + (plane_state->src_h >> 16 != plane_state->crtc_h); + + reqs.rot90 = drm_rotation_90_or_270(plane_state->rotation); + + pipe->sspp = dpu_rm_reserve_sspp(&dpu_kms->rm, global_state, crtc, &reqs); + if (!pipe->sspp) + return -ENODEV; + + if (!dpu_plane_try_multirect_parallel(pipe, pipe_cfg, r_pipe, r_pipe_cfg, + pipe->sspp, + msm_framebuffer_format(plane_state->fb), + dpu_kms->catalog->caps->max_linewidth)) { + /* multirect is not possible, use two SSPP blocks */ + r_pipe->sspp = dpu_rm_reserve_sspp(&dpu_kms->rm, global_state, crtc, &reqs); + if (!r_pipe->sspp) + return -ENODEV; + + pipe->multirect_index = DPU_SSPP_RECT_SOLO; + pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; + r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + } + + return dpu_plane_atomic_check_sspp(plane, state, crtc_state); +} + +int dpu_assign_plane_resources(struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_crtc *crtc, + struct drm_plane_state **states, + unsigned int num_planes) +{ + unsigned int i; + int ret; + + for (i = 0; i < num_planes; i++) { + struct drm_plane_state *plane_state = states[i]; + + if (!plane_state || + !plane_state->visible) + continue; + + ret = dpu_plane_virtual_assign_resources(crtc, global_state, + state, plane_state); + if (ret) + break; + } + + return ret; +} + static void dpu_plane_flush_csc(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe) { const struct msm_format *format = @@ -1335,12 +1498,15 @@ static void dpu_plane_atomic_print_state(struct drm_printer *p, drm_printf(p, "\tstage=%d\n", pstate->stage); - drm_printf(p, "\tsspp[0]=%s\n", pipe->sspp->cap->name); - drm_printf(p, "\tmultirect_mode[0]=%s\n", dpu_get_multirect_mode(pipe->multirect_mode)); - drm_printf(p, "\tmultirect_index[0]=%s\n", - dpu_get_multirect_index(pipe->multirect_index)); - drm_printf(p, "\tsrc[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->src_rect)); - drm_printf(p, "\tdst[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->dst_rect)); + if (pipe->sspp) { + drm_printf(p, "\tsspp[0]=%s\n", pipe->sspp->cap->name); + drm_printf(p, "\tmultirect_mode[0]=%s\n", + dpu_get_multirect_mode(pipe->multirect_mode)); + drm_printf(p, "\tmultirect_index[0]=%s\n", + dpu_get_multirect_index(pipe->multirect_index)); + drm_printf(p, "\tsrc[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->src_rect)); + drm_printf(p, "\tdst[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->dst_rect)); + } if (r_pipe->sspp) { drm_printf(p, "\tsspp[1]=%s\n", r_pipe->sspp->cap->name); @@ -1433,39 +1599,29 @@ static const struct drm_plane_helper_funcs dpu_plane_helper_funcs = { .atomic_update = dpu_plane_atomic_update, }; -/** - * dpu_plane_init - create new dpu plane for the given pipe - * @dev: Pointer to DRM device - * @pipe: dpu hardware pipe identifier - * @type: Plane type - PRIMARY/OVERLAY/CURSOR - * @possible_crtcs: bitmask of crtc that can be attached to the given pipe - * - * Initialize the plane. - */ -struct drm_plane *dpu_plane_init(struct drm_device *dev, - uint32_t pipe, enum drm_plane_type type, - unsigned long possible_crtcs) +static const struct drm_plane_helper_funcs dpu_plane_virtual_helper_funcs = { + .prepare_fb = dpu_plane_prepare_fb, + .cleanup_fb = dpu_plane_cleanup_fb, + .atomic_check = dpu_plane_virtual_atomic_check, + .atomic_update = dpu_plane_atomic_update, +}; + +/* initialize plane */ +static struct drm_plane *dpu_plane_init_common(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs, + bool inline_rotation, + const uint32_t *format_list, + uint32_t num_formats, + enum dpu_sspp pipe) { struct drm_plane *plane = NULL; - const uint32_t *format_list; struct dpu_plane *pdpu; struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *kms = to_dpu_kms(priv->kms); - struct dpu_hw_sspp *pipe_hw; - uint32_t num_formats; uint32_t supported_rotations; int ret; - /* initialize underlying h/w driver */ - pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); - if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { - DPU_ERROR("[%u]SSPP is invalid\n", pipe); - return ERR_PTR(-EINVAL); - } - - format_list = pipe_hw->cap->sblk->format_list; - num_formats = pipe_hw->cap->sblk->num_formats; - pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base, 0xff, &dpu_plane_funcs, format_list, num_formats, @@ -1491,7 +1647,7 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180; - if (pipe_hw->cap->features & BIT(DPU_SSPP_INLINE_ROTATION)) + if (inline_rotation) supported_rotations |= DRM_MODE_ROTATE_MASK; drm_plane_create_rotation_property(plane, @@ -1499,10 +1655,98 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, drm_plane_enable_fb_damage_clips(plane); - /* success! finalize initialization */ + DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, + pipe, plane->base.id); + return plane; +} + +/** + * dpu_plane_init - create new dpu plane for the given pipe + * @dev: Pointer to DRM device + * @pipe: dpu hardware pipe identifier + * @type: Plane type - PRIMARY/OVERLAY/CURSOR + * @possible_crtcs: bitmask of crtc that can be attached to the given pipe + * + * Initialize the plane. + */ +struct drm_plane *dpu_plane_init(struct drm_device *dev, + uint32_t pipe, enum drm_plane_type type, + unsigned long possible_crtcs) +{ + struct drm_plane *plane = NULL; + struct msm_drm_private *priv = dev->dev_private; + struct dpu_kms *kms = to_dpu_kms(priv->kms); + struct dpu_hw_sspp *pipe_hw; + + /* initialize underlying h/w driver */ + pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); + if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { + DPU_ERROR("[%u]SSPP is invalid\n", pipe); + return ERR_PTR(-EINVAL); + } + + + plane = dpu_plane_init_common(dev, type, possible_crtcs, + pipe_hw->cap->features & BIT(DPU_SSPP_INLINE_ROTATION), + pipe_hw->cap->sblk->format_list, + pipe_hw->cap->sblk->num_formats, + pipe); + if (IS_ERR(plane)) + return plane; + drm_plane_helper_add(plane, &dpu_plane_helper_funcs); DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, pipe, plane->base.id); + + return plane; +} + +/** + * dpu_plane_init_virtual - create new virtualized DPU plane + * @dev: Pointer to DRM device + * @type: Plane type - PRIMARY/OVERLAY/CURSOR + * @possible_crtcs: bitmask of crtc that can be attached to the given pipe + * + * Initialize the virtual plane with no backing SSPP / pipe. + */ +struct drm_plane *dpu_plane_init_virtual(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs) +{ + struct drm_plane *plane = NULL; + struct msm_drm_private *priv = dev->dev_private; + struct dpu_kms *kms = to_dpu_kms(priv->kms); + bool has_inline_rotation = false; + const u32 *format_list = NULL; + u32 num_formats = 0; + int i; + + /* Determine the largest configuration that we can implement */ + for (i = 0; i < kms->catalog->sspp_count; i++) { + const struct dpu_sspp_cfg *cfg = &kms->catalog->sspp[i]; + + if (test_bit(DPU_SSPP_INLINE_ROTATION, &cfg->features)) + has_inline_rotation = true; + + if (!format_list || + cfg->sblk->csc_blk.len) { + format_list = cfg->sblk->format_list; + num_formats = cfg->sblk->num_formats; + } + } + + plane = dpu_plane_init_common(dev, type, possible_crtcs, + has_inline_rotation, + format_list, + num_formats, + SSPP_NONE); + if (IS_ERR(plane)) + return plane; + + drm_plane_helper_add(plane, &dpu_plane_virtual_helper_funcs); + + DPU_DEBUG("%s created virtual id:%u\n", plane->name, plane->base.id); + return plane; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h index 97090ca7842b..acd5725175cd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h @@ -62,10 +62,23 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, uint32_t pipe, enum drm_plane_type type, unsigned long possible_crtcs); +struct drm_plane *dpu_plane_init_virtual(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs); + +int dpu_plane_color_fill(struct drm_plane *plane, + uint32_t color, uint32_t alpha); + #ifdef CONFIG_DEBUG_FS void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable); #else static inline void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable) {} #endif +int dpu_assign_plane_resources(struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_crtc *crtc, + struct drm_plane_state **states, + unsigned int num_planes); + #endif /* _DPU_PLANE_H_ */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index c247af03dc8e..5baf9df702b8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #define pr_fmt(fmt) "[drm:%s] " fmt, __func__ @@ -9,6 +9,7 @@ #include "dpu_hw_lm.h" #include "dpu_hw_ctl.h" #include "dpu_hw_cdm.h" +#include "dpu_hw_cwb.h" #include "dpu_hw_pingpong.h" #include "dpu_hw_sspp.h" #include "dpu_hw_intf.h" @@ -27,14 +28,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx, } /** - * struct dpu_rm_requirements - Reservation requirements parameter bundle - * @topology: selected topology for the display - */ -struct dpu_rm_requirements { - struct msm_display_topology topology; -}; - -/** * dpu_rm_init - Read hardware catalog and create reservation tracking objects * for all HW blocks. * @dev: Corresponding device for devres management @@ -130,6 +123,19 @@ int dpu_rm_init(struct drm_device *dev, rm->hw_wb[wb->id - WB_0] = hw; } + for (i = 0; i < cat->cwb_count; i++) { + struct dpu_hw_cwb *hw; + const struct dpu_cwb_cfg *cwb = &cat->cwb[i]; + + hw = dpu_hw_cwb_init(dev, cwb, mmio); + if (IS_ERR(hw)) { + rc = PTR_ERR(hw); + DPU_ERROR("failed cwb object creation: err %d\n", rc); + goto fail; + } + rm->cwb_blks[cwb->id - CWB_0] = &hw->base; + } + for (i = 0; i < cat->ctl_count; i++) { struct dpu_hw_ctl *hw; const struct dpu_ctl_cfg *ctl = &cat->ctl[i]; @@ -241,14 +247,13 @@ static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx) * mixer in rm->pingpong_blks[]. * @dspp_idx: output parameter, index of dspp block attached to the layer * mixer in rm->dspp_blks[]. - * @reqs: input parameter, rm requirements for HW blocks needed in the - * datapath. + * @topology: selected topology for the display * Return: true if lm matches all requirements, false otherwise */ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, int lm_idx, int *pp_idx, int *dspp_idx, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { const struct dpu_lm_cfg *lm_cfg; int idx; @@ -273,7 +278,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, } *pp_idx = idx; - if (!reqs->topology.num_dspp) + if (!topology->num_dspp) return true; idx = lm_cfg->dspp - DSPP_0; @@ -295,7 +300,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, static int _dpu_rm_reserve_lms(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { int lm_idx[MAX_BLOCKS]; @@ -303,14 +308,14 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, int dspp_idx[MAX_BLOCKS] = {0}; int i, lm_count = 0; - if (!reqs->topology.num_lm) { - DPU_ERROR("invalid number of lm: %d\n", reqs->topology.num_lm); + if (!topology->num_lm) { + DPU_ERROR("invalid number of lm: %d\n", topology->num_lm); return -EINVAL; } /* Find a primary mixer */ for (i = 0; i < ARRAY_SIZE(rm->mixer_blks) && - lm_count < reqs->topology.num_lm; i++) { + lm_count < topology->num_lm; i++) { if (!rm->mixer_blks[i]) continue; @@ -319,14 +324,14 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, if (!_dpu_rm_check_lm_and_get_connected_blks(rm, global_state, enc_id, i, &pp_idx[lm_count], - &dspp_idx[lm_count], reqs)) { + &dspp_idx[lm_count], topology)) { continue; } ++lm_count; /* Valid primary mixer found, find matching peers */ - if (lm_count < reqs->topology.num_lm) { + if (lm_count < topology->num_lm) { int j = _dpu_rm_get_lm_peer(rm, i); /* ignore the peer if there is an error or if the peer was already processed */ @@ -339,7 +344,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, if (!_dpu_rm_check_lm_and_get_connected_blks(rm, global_state, enc_id, j, &pp_idx[lm_count], &dspp_idx[lm_count], - reqs)) { + topology)) { continue; } @@ -348,7 +353,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, } } - if (lm_count != reqs->topology.num_lm) { + if (lm_count != topology->num_lm) { DPU_DEBUG("unable to find appropriate mixers\n"); return -ENAVAIL; } @@ -357,7 +362,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, global_state->mixer_to_enc_id[lm_idx[i]] = enc_id; global_state->pingpong_to_enc_id[pp_idx[i]] = enc_id; global_state->dspp_to_enc_id[dspp_idx[i]] = - reqs->topology.num_dspp ? enc_id : 0; + topology->num_dspp ? enc_id : 0; trace_dpu_rm_reserve_lms(lm_idx[i] + LM_0, enc_id, pp_idx[i] + PINGPONG_0); @@ -594,28 +599,28 @@ static int _dpu_rm_make_reservation( struct dpu_rm *rm, struct dpu_global_state *global_state, struct drm_encoder *enc, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { int ret; - ret = _dpu_rm_reserve_lms(rm, global_state, enc->base.id, reqs); + ret = _dpu_rm_reserve_lms(rm, global_state, enc->base.id, topology); if (ret) { DPU_ERROR("unable to find appropriate mixers\n"); return ret; } ret = _dpu_rm_reserve_ctls(rm, global_state, enc->base.id, - &reqs->topology); + topology); if (ret) { DPU_ERROR("unable to find appropriate CTL\n"); return ret; } - ret = _dpu_rm_reserve_dsc(rm, global_state, enc, &reqs->topology); + ret = _dpu_rm_reserve_dsc(rm, global_state, enc, topology); if (ret) return ret; - if (reqs->topology.needs_cdm) { + if (topology->needs_cdm) { ret = _dpu_rm_reserve_cdm(rm, global_state, enc); if (ret) { DPU_ERROR("unable to find CDM blk\n"); @@ -626,20 +631,6 @@ static int _dpu_rm_make_reservation( return ret; } -static int _dpu_rm_populate_requirements( - struct drm_encoder *enc, - struct dpu_rm_requirements *reqs, - struct msm_display_topology req_topology) -{ - reqs->topology = req_topology; - - DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n", - reqs->topology.num_lm, reqs->topology.num_dsc, - reqs->topology.num_intf, reqs->topology.needs_cdm); - - return 0; -} - static void _dpu_rm_clear_mapping(uint32_t *res_mapping, int cnt, uint32_t enc_id) { @@ -693,9 +684,8 @@ int dpu_rm_reserve( struct dpu_global_state *global_state, struct drm_encoder *enc, struct drm_crtc_state *crtc_state, - struct msm_display_topology topology) + struct msm_display_topology *topology) { - struct dpu_rm_requirements reqs; int ret; /* Check if this is just a page-flip */ @@ -710,13 +700,11 @@ int dpu_rm_reserve( DRM_DEBUG_KMS("reserving hw for enc %d crtc %d\n", enc->base.id, crtc_state->crtc->base.id); - ret = _dpu_rm_populate_requirements(enc, &reqs, topology); - if (ret) { - DPU_ERROR("failed to populate hw requirements\n"); - return ret; - } + DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n", + topology->num_lm, topology->num_dsc, + topology->num_intf); - ret = _dpu_rm_make_reservation(rm, global_state, enc, &reqs); + ret = _dpu_rm_make_reservation(rm, global_state, enc, topology); if (ret) DPU_ERROR("failed to reserve hw resources: %d\n", ret); @@ -725,6 +713,88 @@ int dpu_rm_reserve( return ret; } +static struct dpu_hw_sspp *dpu_rm_try_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs, + unsigned int type) +{ + uint32_t crtc_id = crtc->base.id; + struct dpu_hw_sspp *hw_sspp; + int i; + + for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++) { + if (!rm->hw_sspp[i]) + continue; + + if (global_state->sspp_to_crtc_id[i]) + continue; + + hw_sspp = rm->hw_sspp[i]; + + if (hw_sspp->cap->type != type) + continue; + + if (reqs->scale && !hw_sspp->cap->sblk->scaler_blk.len) + continue; + + // TODO: QSEED2 and RGB scalers are not yet supported + if (reqs->scale && !hw_sspp->ops.setup_scaler) + continue; + + if (reqs->yuv && !hw_sspp->cap->sblk->csc_blk.len) + continue; + + if (reqs->rot90 && !(hw_sspp->cap->features & DPU_SSPP_INLINE_ROTATION)) + continue; + + global_state->sspp_to_crtc_id[i] = crtc_id; + + return rm->hw_sspp[i]; + } + + return NULL; +} + +/** + * dpu_rm_reserve_sspp - Reserve the required SSPP for the provided CRTC + * @rm: DPU Resource Manager handle + * @global_state: private global state + * @crtc: DRM CRTC handle + * @reqs: SSPP required features + */ +struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs) +{ + struct dpu_hw_sspp *hw_sspp = NULL; + + if (!reqs->scale && !reqs->yuv) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_DMA); + if (!hw_sspp && reqs->scale) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_RGB); + if (!hw_sspp) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_VIG); + + return hw_sspp; +} + +/** + * dpu_rm_release_all_sspp - Given the CRTC, release all SSPP + * blocks previously reserved for that use case. + * @global_state: resources shared across multiple kms objects + * @crtc: DRM CRTC handle + */ +void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, + struct drm_crtc *crtc) +{ + uint32_t crtc_id = crtc->base.id; + + _dpu_rm_clear_mapping(global_state->sspp_to_crtc_id, + ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id); +} + /** * dpu_rm_get_assigned_resources - Get hw resources of the given type that are * assigned to this encoder @@ -859,4 +929,11 @@ void dpu_rm_print_state(struct drm_printer *p, dpu_rm_print_state_helper(p, rm->cdm_blk, global_state->cdm_to_enc_id); drm_puts(p, "\n"); + + drm_puts(p, "\tsspp="); + /* skip SSPP_NONE and start from the next index */ + for (i = SSPP_NONE + 1; i < ARRAY_SIZE(global_state->sspp_to_crtc_id); i++) + dpu_rm_print_state_helper(p, rm->hw_sspp[i] ? &rm->hw_sspp[i]->base : NULL, + global_state->sspp_to_crtc_id[i]); + drm_puts(p, "\n"); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h index ea0e49cb7b0d..99bd594ee0d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h @@ -20,6 +20,7 @@ struct dpu_global_state; * @ctl_blks: array of ctl hardware resources * @hw_intf: array of intf hardware resources * @hw_wb: array of wb hardware resources + * @hw_cwb: array of cwb hardware resources * @dspp_blks: array of dspp hardware resources * @hw_sspp: array of sspp hardware resources * @cdm_blk: cdm hardware resource @@ -30,6 +31,7 @@ struct dpu_rm { struct dpu_hw_blk *ctl_blks[CTL_MAX - CTL_0]; struct dpu_hw_intf *hw_intf[INTF_MAX - INTF_0]; struct dpu_hw_wb *hw_wb[WB_MAX - WB_0]; + struct dpu_hw_blk *cwb_blks[CWB_MAX - CWB_0]; struct dpu_hw_blk *dspp_blks[DSPP_MAX - DSPP_0]; struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0]; struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0]; @@ -37,6 +39,12 @@ struct dpu_rm { struct dpu_hw_blk *cdm_blk; }; +struct dpu_rm_sspp_requirements { + bool yuv; + bool scale; + bool rot90; +}; + /** * struct msm_display_topology - defines a display topology pipeline * @num_lm: number of layer mixers used @@ -63,11 +71,19 @@ int dpu_rm_reserve(struct dpu_rm *rm, struct dpu_global_state *global_state, struct drm_encoder *drm_enc, struct drm_crtc_state *crtc_state, - struct msm_display_topology topology); + struct msm_display_topology *topology); void dpu_rm_release(struct dpu_global_state *global_state, struct drm_encoder *enc); +struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs); + +void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, + struct drm_crtc *crtc); + int dpu_rm_get_assigned_resources(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, enum dpu_hw_blk_type type, struct dpu_hw_blk **blks, int blks_size); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 16f144cbc0c9..8ff496082902 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -42,9 +42,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, if (!conn_state || !conn_state->connector) { DPU_ERROR("invalid connector state\n"); return -EINVAL; - } else if (conn_state->connector->status != connector_status_connected) { - DPU_ERROR("connector not connected %d\n", conn_state->connector->status); - return -EINVAL; } crtc = conn_state->crtc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 576995ddce37..8bbc7fb881d5 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -389,7 +389,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, /* TODO: different regulators in other cases? */ mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v"; - mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[1].supply = "lvds-pll-vdda"; mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda"; ret = devm_regulator_bulk_get(dev->dev, diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c index e75b97127c0d..2be00b11e557 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c @@ -109,7 +109,7 @@ int msm_disp_snapshot_init(struct drm_device *drm_dev) mutex_init(&kms->dump_mutex); - kms->dump_worker = kthread_create_worker(0, "%s", "disp_snapshot"); + kms->dump_worker = kthread_run_worker(0, "%s", "disp_snapshot"); if (IS_ERR(kms->dump_worker)) DRM_ERROR("failed to create disp state task\n"); diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 0fd5e0abaf07..70fdc9fe228a 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -14,6 +14,7 @@ #include "dp_catalog.h" #include "dp_audio.h" #include "dp_panel.h" +#include "dp_reg.h" #include "dp_display.h" #include "dp_utils.h" @@ -28,251 +29,64 @@ struct msm_dp_audio_private { struct msm_dp_audio msm_dp_audio; }; -static u32 msm_dp_audio_get_header(struct msm_dp_catalog *catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) -{ - return msm_dp_catalog_audio_get_header(catalog, sdp, header); -} - -static void msm_dp_audio_set_header(struct msm_dp_catalog *catalog, - u32 data, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) -{ - msm_dp_catalog_audio_set_header(catalog, sdp, header, data); -} - static void msm_dp_audio_stream_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x02; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_2); - new_value = value; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_3); - - new_value = audio->channels - 1; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x02, + .HB2 = 0x00, + .HB3 = audio->channels - 1, + }; + + msm_dp_catalog_write_audio_stream(audio->catalog, &sdp_hdr); } static void msm_dp_audio_timestamp_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x1; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x17; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_3); - - new_value = (0x0 | (0x11 << 2)); - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x01, + .HB2 = 0x17, + .HB3 = 0x0 | (0x11 << 2), + }; + + msm_dp_catalog_write_audio_timestamp(audio->catalog, &sdp_hdr); } static void msm_dp_audio_infoframe_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x84; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x1b; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_3); - - new_value = (0x0 | (0x11 << 2)); - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - new_value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x84, + .HB2 = 0x1b, + .HB3 = 0x0 | (0x11 << 2), + }; + + msm_dp_catalog_write_audio_infoframe(audio->catalog, &sdp_hdr); } static void msm_dp_audio_copy_management_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x05; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x0F; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_3); - - new_value = 0x0; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x05, + .HB2 = 0x0f, + .HB3 = 0x00, + }; + + msm_dp_catalog_write_audio_copy_mgmt(audio->catalog, &sdp_hdr); } static void msm_dp_audio_isrc_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x06; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x0F; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_2); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x06, + .HB2 = 0x0f, + .HB3 = 0x00, + }; + + msm_dp_catalog_write_audio_isrc(audio->catalog, &sdp_hdr); } static void msm_dp_audio_setup_sdp(struct msm_dp_audio_private *audio) @@ -329,10 +143,10 @@ static void msm_dp_audio_safe_to_exit_level(struct msm_dp_audio_private *audio) safe_to_exit_level = 5; break; default: + safe_to_exit_level = 14; drm_dbg_dp(audio->drm_dev, "setting the default safe_to_exit_level = %u\n", safe_to_exit_level); - safe_to_exit_level = 14; break; } @@ -539,14 +353,13 @@ int msm_dp_register_audio_driver(struct device *dev, } struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, - struct msm_dp_panel *panel, struct msm_dp_catalog *catalog) { int rc = 0; struct msm_dp_audio_private *audio; struct msm_dp_audio *msm_dp_audio; - if (!pdev || !panel || !catalog) { + if (!pdev || !catalog) { DRM_ERROR("invalid input\n"); rc = -EINVAL; goto error; @@ -563,8 +376,6 @@ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, msm_dp_audio = &audio->msm_dp_audio; - msm_dp_catalog_audio_init(catalog); - return msm_dp_audio; error: return ERR_PTR(rc); diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h index 1c9efaaa40e5..beea34cbab77 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.h +++ b/drivers/gpu/drm/msm/dp/dp_audio.h @@ -8,7 +8,6 @@ #include <linux/platform_device.h> -#include "dp_panel.h" #include "dp_catalog.h" #include <sound/hdmi-codec.h> @@ -28,14 +27,12 @@ struct msm_dp_audio { * Creates and instance of dp audio. * * @pdev: caller's platform device instance. - * @panel: an instance of msm_dp_panel module. * @catalog: an instance of msm_dp_catalog module. * * Returns the error code in case of failure, otherwize * an instance of newly created msm_dp_module. */ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, - struct msm_dp_panel *panel, struct msm_dp_catalog *catalog); /** diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index b4c8856fb25d..7b7eadb2f83b 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -79,7 +79,6 @@ struct msm_dp_catalog_private { struct device *dev; struct drm_device *drm_dev; struct dss_io_data io; - u32 (*audio_map)[DP_AUDIO_SDP_HEADER_MAX]; struct msm_dp_catalog msm_dp_catalog; }; @@ -276,43 +275,6 @@ int msm_dp_catalog_aux_wait_for_hpd_connect_state(struct msm_dp_catalog *msm_dp_ min(wait_us, 2000), wait_us); } -static void dump_regs(void __iomem *base, int len) -{ - int i; - u32 x0, x4, x8, xc; - u32 addr_off = 0; - - len = DIV_ROUND_UP(len, 16); - for (i = 0; i < len; i++) { - x0 = readl_relaxed(base + addr_off); - x4 = readl_relaxed(base + addr_off + 0x04); - x8 = readl_relaxed(base + addr_off + 0x08); - xc = readl_relaxed(base + addr_off + 0x0c); - - pr_info("%08x: %08x %08x %08x %08x", addr_off, x0, x4, x8, xc); - addr_off += 16; - } -} - -void msm_dp_catalog_dump_regs(struct msm_dp_catalog *msm_dp_catalog) -{ - struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); - struct dss_io_data *io = &catalog->io; - - pr_info("AHB regs\n"); - dump_regs(io->ahb.base, io->ahb.len); - - pr_info("AUXCLK regs\n"); - dump_regs(io->aux.base, io->aux.len); - - pr_info("LCLK regs\n"); - dump_regs(io->link.base, io->link.len); - - pr_info("P0CLK regs\n"); - dump_regs(io->p0.base, io->p0.len); -} - u32 msm_dp_catalog_aux_get_irq(struct msm_dp_catalog *msm_dp_catalog) { struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, @@ -1036,7 +998,6 @@ void msm_dp_catalog_panel_tpg_enable(struct msm_dp_catalog *msm_dp_catalog, display_hctl = (hsync_end_x << 16) | hsync_start_x; - msm_dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0x0); msm_dp_write_p0(catalog, MMSS_DP_INTF_HSYNC_CTL, hsync_ctl); msm_dp_write_p0(catalog, MMSS_DP_INTF_VSYNC_PERIOD_F0, vsync_period * hsync_period); @@ -1160,38 +1121,75 @@ struct msm_dp_catalog *msm_dp_catalog_get(struct device *dev) return &catalog->msm_dp_catalog; } -u32 msm_dp_catalog_audio_get_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) +void msm_dp_catalog_write_audio_stream(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) { - struct msm_dp_catalog_private *catalog; - u32 (*sdp_map)[DP_AUDIO_SDP_HEADER_MAX]; + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); + msm_dp_utils_pack_sdp_header(sdp_hdr, header); + + msm_dp_write_link(catalog, MMSS_DP_AUDIO_STREAM_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_STREAM_1, header[1]); +} + +void msm_dp_catalog_write_audio_timestamp(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - sdp_map = catalog->audio_map; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - return msm_dp_read_link(catalog, sdp_map[sdp][header]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_TIMESTAMP_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_TIMESTAMP_1, header[1]); } -void msm_dp_catalog_audio_set_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header, - u32 data) +void msm_dp_catalog_write_audio_infoframe(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) { - struct msm_dp_catalog_private *catalog; - u32 (*sdp_map)[DP_AUDIO_SDP_HEADER_MAX]; + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - if (!msm_dp_catalog) - return; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_INFOFRAME_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_INFOFRAME_1, header[1]); +} + +void msm_dp_catalog_write_audio_copy_mgmt(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - sdp_map = catalog->audio_map; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - msm_dp_write_link(catalog, sdp_map[sdp][header], data); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_COPYMANAGEMENT_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_COPYMANAGEMENT_1, header[1]); +} + +void msm_dp_catalog_write_audio_isrc(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + struct dp_sdp_header tmp = *sdp_hdr; + u32 header[2]; + u32 reg; + + /* XXX: is it necessary to preserve this field? */ + reg = msm_dp_read_link(catalog, MMSS_DP_AUDIO_ISRC_1); + tmp.HB3 = FIELD_GET(HEADER_3_MASK, reg); + + msm_dp_utils_pack_sdp_header(&tmp, header); + + msm_dp_write_link(catalog, MMSS_DP_AUDIO_ISRC_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_ISRC_1, header[1]); } void msm_dp_catalog_audio_config_acr(struct msm_dp_catalog *msm_dp_catalog, u32 select) @@ -1277,47 +1275,6 @@ void msm_dp_catalog_audio_config_sdp(struct msm_dp_catalog *msm_dp_catalog) msm_dp_write_link(catalog, MMSS_DP_SDP_CFG2, sdp_cfg2); } -void msm_dp_catalog_audio_init(struct msm_dp_catalog *msm_dp_catalog) -{ - struct msm_dp_catalog_private *catalog; - - static u32 sdp_map[][DP_AUDIO_SDP_HEADER_MAX] = { - { - MMSS_DP_AUDIO_STREAM_0, - MMSS_DP_AUDIO_STREAM_1, - MMSS_DP_AUDIO_STREAM_1, - }, - { - MMSS_DP_AUDIO_TIMESTAMP_0, - MMSS_DP_AUDIO_TIMESTAMP_1, - MMSS_DP_AUDIO_TIMESTAMP_1, - }, - { - MMSS_DP_AUDIO_INFOFRAME_0, - MMSS_DP_AUDIO_INFOFRAME_1, - MMSS_DP_AUDIO_INFOFRAME_1, - }, - { - MMSS_DP_AUDIO_COPYMANAGEMENT_0, - MMSS_DP_AUDIO_COPYMANAGEMENT_1, - MMSS_DP_AUDIO_COPYMANAGEMENT_1, - }, - { - MMSS_DP_AUDIO_ISRC_0, - MMSS_DP_AUDIO_ISRC_1, - MMSS_DP_AUDIO_ISRC_1, - }, - }; - - if (!msm_dp_catalog) - return; - - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); - - catalog->audio_map = sdp_map; -} - void msm_dp_catalog_audio_sfe_level(struct msm_dp_catalog *msm_dp_catalog, u32 safe_to_exit_level) { struct msm_dp_catalog_private *catalog; diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index e932b17eecbf..6678b0ac9a67 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -31,22 +31,6 @@ #define DP_HW_VERSION_1_0 0x10000000 #define DP_HW_VERSION_1_2 0x10020000 -enum msm_dp_catalog_audio_sdp_type { - DP_AUDIO_SDP_STREAM, - DP_AUDIO_SDP_TIMESTAMP, - DP_AUDIO_SDP_INFOFRAME, - DP_AUDIO_SDP_COPYMANAGEMENT, - DP_AUDIO_SDP_ISRC, - DP_AUDIO_SDP_MAX, -}; - -enum msm_dp_catalog_audio_header_type { - DP_AUDIO_SDP_HEADER_1, - DP_AUDIO_SDP_HEADER_2, - DP_AUDIO_SDP_HEADER_3, - DP_AUDIO_SDP_HEADER_MAX, -}; - struct msm_dp_catalog { bool wide_bus_en; }; @@ -104,7 +88,6 @@ int msm_dp_catalog_panel_timing_cfg(struct msm_dp_catalog *msm_dp_catalog, u32 t u32 sync_start, u32 width_blanking, u32 msm_dp_active); void msm_dp_catalog_panel_enable_vsc_sdp(struct msm_dp_catalog *msm_dp_catalog, struct dp_sdp *vsc_sdp); void msm_dp_catalog_panel_disable_vsc_sdp(struct msm_dp_catalog *msm_dp_catalog); -void msm_dp_catalog_dump_regs(struct msm_dp_catalog *msm_dp_catalog); void msm_dp_catalog_panel_tpg_enable(struct msm_dp_catalog *msm_dp_catalog, struct drm_display_mode *drm_mode); void msm_dp_catalog_panel_tpg_disable(struct msm_dp_catalog *msm_dp_catalog); @@ -112,17 +95,19 @@ void msm_dp_catalog_panel_tpg_disable(struct msm_dp_catalog *msm_dp_catalog); struct msm_dp_catalog *msm_dp_catalog_get(struct device *dev); /* DP Audio APIs */ -u32 msm_dp_catalog_audio_get_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header); -void msm_dp_catalog_audio_set_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header, - u32 data); +void msm_dp_catalog_write_audio_stream(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_timestamp(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_infoframe(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_copy_mgmt(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_isrc(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); void msm_dp_catalog_audio_config_acr(struct msm_dp_catalog *catalog, u32 select); void msm_dp_catalog_audio_enable(struct msm_dp_catalog *catalog, bool enable); void msm_dp_catalog_audio_config_sdp(struct msm_dp_catalog *catalog); -void msm_dp_catalog_audio_init(struct msm_dp_catalog *catalog); void msm_dp_catalog_audio_sfe_level(struct msm_dp_catalog *catalog, u32 safe_to_exit_level); #endif /* _DP_CATALOG_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index bc2ca8133b79..9c463ae2f8fa 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -178,7 +178,6 @@ static void msm_dp_ctrl_configure_source_params(struct msm_dp_ctrl_private *ctrl u32 cc, tb; msm_dp_catalog_ctrl_lane_mapping(ctrl->catalog); - msm_dp_catalog_ctrl_mainlink_ctrl(ctrl->catalog, true); msm_dp_catalog_setup_peripheral_flush(ctrl->catalog); msm_dp_ctrl_config_ctrl(ctrl); @@ -2071,6 +2070,7 @@ void msm_dp_ctrl_off_link(struct msm_dp_ctrl *msm_dp_ctrl) msm_dp_catalog_ctrl_mainlink_ctrl(ctrl->catalog, false); + dev_pm_opp_set_rate(ctrl->dev, 0); msm_dp_ctrl_link_clk_disable(&ctrl->msm_dp_ctrl); DRM_DEBUG_DP("Before, phy=%p init_count=%d power_on=%d\n", diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index aff51bb973eb..24dd37f1682b 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -722,9 +722,6 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) { int rc = 0; struct device *dev = &dp->msm_dp_display.pdev->dev; - struct msm_dp_panel_in panel_in = { - .dev = dev, - }; struct phy *phy; phy = devm_phy_get(dev, "dp"); @@ -765,11 +762,7 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) goto error_link; } - panel_in.aux = dp->aux; - panel_in.catalog = dp->catalog; - panel_in.link = dp->link; - - dp->panel = msm_dp_panel_get(&panel_in); + dp->panel = msm_dp_panel_get(dev, dp->aux, dp->link, dp->catalog); if (IS_ERR(dp->panel)) { rc = PTR_ERR(dp->panel); DRM_ERROR("failed to initialize panel, rc = %d\n", rc); @@ -787,7 +780,7 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) goto error_ctrl; } - dp->audio = msm_dp_audio_get(dp->msm_dp_display.pdev, dp->panel, dp->catalog); + dp->audio = msm_dp_audio_get(dp->msm_dp_display.pdev, dp->catalog); if (IS_ERR(dp->audio)) { rc = PTR_ERR(dp->audio); pr_err("failed to initialize audio, rc = %d\n", rc); diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 5d7eaa31bf31..92415bf8aa16 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -317,17 +317,6 @@ static int msm_dp_panel_setup_vsc_sdp_yuv_420(struct msm_dp_panel *msm_dp_panel) return 0; } -void msm_dp_panel_dump_regs(struct msm_dp_panel *msm_dp_panel) -{ - struct msm_dp_catalog *catalog; - struct msm_dp_panel_private *panel; - - panel = container_of(msm_dp_panel, struct msm_dp_panel_private, msm_dp_panel); - catalog = panel->catalog; - - msm_dp_catalog_dump_regs(catalog); -} - int msm_dp_panel_timing_cfg(struct msm_dp_panel *msm_dp_panel) { u32 data, total_ver, total_hor; @@ -486,25 +475,26 @@ static int msm_dp_panel_parse_dt(struct msm_dp_panel *msm_dp_panel) return 0; } -struct msm_dp_panel *msm_dp_panel_get(struct msm_dp_panel_in *in) +struct msm_dp_panel *msm_dp_panel_get(struct device *dev, struct drm_dp_aux *aux, + struct msm_dp_link *link, struct msm_dp_catalog *catalog) { struct msm_dp_panel_private *panel; struct msm_dp_panel *msm_dp_panel; int ret; - if (!in->dev || !in->catalog || !in->aux || !in->link) { + if (!dev || !catalog || !aux || !link) { DRM_ERROR("invalid input\n"); return ERR_PTR(-EINVAL); } - panel = devm_kzalloc(in->dev, sizeof(*panel), GFP_KERNEL); + panel = devm_kzalloc(dev, sizeof(*panel), GFP_KERNEL); if (!panel) return ERR_PTR(-ENOMEM); - panel->dev = in->dev; - panel->aux = in->aux; - panel->catalog = in->catalog; - panel->link = in->link; + panel->dev = dev; + panel->aux = aux; + panel->catalog = catalog; + panel->link = link; msm_dp_panel = &panel->msm_dp_panel; msm_dp_panel->max_bw_code = DP_LINK_BW_8_1; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 0e944db3adf2..4906f4f09f24 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -21,13 +21,6 @@ struct msm_dp_display_mode { bool out_fmt_is_yuv_420; }; -struct msm_dp_panel_in { - struct device *dev; - struct drm_dp_aux *aux; - struct msm_dp_link *link; - struct msm_dp_catalog *catalog; -}; - struct msm_dp_panel_psr { u8 version; u8 capabilities; @@ -55,7 +48,6 @@ struct msm_dp_panel { int msm_dp_panel_init_panel_info(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_deinit(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_timing_cfg(struct msm_dp_panel *msm_dp_panel); -void msm_dp_panel_dump_regs(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_read_sink_caps(struct msm_dp_panel *msm_dp_panel, struct drm_connector *connector); u32 msm_dp_panel_get_mode_bpp(struct msm_dp_panel *msm_dp_panel, u32 mode_max_bpp, @@ -92,6 +84,7 @@ static inline bool is_lane_count_valid(u32 lane_count) lane_count == 4); } -struct msm_dp_panel *msm_dp_panel_get(struct msm_dp_panel_in *in); +struct msm_dp_panel *msm_dp_panel_get(struct device *dev, struct drm_dp_aux *aux, + struct msm_dp_link *link, struct msm_dp_catalog *catalog); void msm_dp_panel_put(struct msm_dp_panel *msm_dp_panel); #endif /* _DP_PANEL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_utils.c b/drivers/gpu/drm/msm/dp/dp_utils.c index 2a40f07fe2d5..4a5ebb0c33b8 100644 --- a/drivers/gpu/drm/msm/dp/dp_utils.c +++ b/drivers/gpu/drm/msm/dp/dp_utils.c @@ -74,14 +74,8 @@ u8 msm_dp_utils_calculate_parity(u32 data) return parity_byte; } -ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *header_buff) +void msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 header_buff[2]) { - size_t length; - - length = sizeof(header_buff); - if (length < DP_SDP_HEADER_SIZE) - return -ENOSPC; - header_buff[0] = FIELD_PREP(HEADER_0_MASK, sdp_header->HB0) | FIELD_PREP(PARITY_0_MASK, msm_dp_utils_calculate_parity(sdp_header->HB0)) | FIELD_PREP(HEADER_1_MASK, sdp_header->HB1) | @@ -91,6 +85,4 @@ ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *head FIELD_PREP(PARITY_2_MASK, msm_dp_utils_calculate_parity(sdp_header->HB2)) | FIELD_PREP(HEADER_3_MASK, sdp_header->HB3) | FIELD_PREP(PARITY_3_MASK, msm_dp_utils_calculate_parity(sdp_header->HB3)); - - return length; } diff --git a/drivers/gpu/drm/msm/dp/dp_utils.h b/drivers/gpu/drm/msm/dp/dp_utils.h index 88d53157f5b5..2e4f98a863c4 100644 --- a/drivers/gpu/drm/msm/dp/dp_utils.h +++ b/drivers/gpu/drm/msm/dp/dp_utils.h @@ -31,6 +31,6 @@ u8 msm_dp_utils_get_g0_value(u8 data); u8 msm_dp_utils_get_g1_value(u8 data); u8 msm_dp_utils_calculate_parity(u32 data); -ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *header_buff); +void msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 header_buff[2]); #endif /* _DP_UTILS_H_ */ diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 10ba7d153d1c..7754dcec33d0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -286,6 +286,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_3_0, &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_3_1, + &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_0, &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_1, diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index 4c9b4b37681b..120cb65164c1 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -23,6 +23,7 @@ #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 #define MSM_DSI_6G_VER_MINOR_V2_3_0 0x20030000 +#define MSM_DSI_6G_VER_MINOR_V2_3_1 0x20030001 #define MSM_DSI_6G_VER_MINOR_V2_4_0 0x20040000 #define MSM_DSI_6G_VER_MINOR_V2_4_1 0x20040001 #define MSM_DSI_6G_VER_MINOR_V2_5_0 0x20050000 diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index dd58bc0a49eb..c0bcc6828963 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -567,6 +567,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_14nm_8953_cfgs }, { .compatible = "qcom,sm6125-dsi-phy-14nm", .data = &dsi_phy_14nm_2290_cfgs }, + { .compatible = "qcom,sm6150-dsi-phy-14nm", + .data = &dsi_phy_14nm_6150_cfgs }, #endif #ifdef CONFIG_DRM_MSM_DSI_10NM_PHY { .compatible = "qcom,dsi-phy-10nm", diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 4953459edd63..8985818bb2e0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -46,6 +46,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8937_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_14nm_6150_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 1723f0e4faa4..2c3cbe0f2870 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -1032,6 +1032,10 @@ static const struct regulator_bulk_data dsi_phy_14nm_73p4mA_regulators[] = { { .supply = "vcca", .init_load_uA = 73400 }, }; +static const struct regulator_bulk_data dsi_phy_14nm_36mA_regulators[] = { + { .supply = "vdda", .init_load_uA = 36000 }, +}; + const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs = { .has_phy_lane = true, .regulator_data = dsi_phy_14nm_17mA_regulators, @@ -1097,3 +1101,20 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = { .io_start = { 0x5e94400 }, .num_dsi_phy = 1, }; + +const struct msm_dsi_phy_cfg dsi_phy_14nm_6150_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_14nm_36mA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_14nm_36mA_regulators), + .ops = { + .enable = dsi_14nm_phy_enable, + .disable = dsi_14nm_phy_disable, + .pll_init = dsi_pll_14nm_init, + .save_pll_state = dsi_14nm_pll_save_state, + .restore_pll_state = dsi_14nm_pll_restore_state, + }, + .min_pll_rate = VCO_MIN_RATE, + .max_pll_rate = VCO_MAX_RATE, + .io_start = { 0xae94400 }, + .num_dsi_phy = 1, +}; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index a719fd33d9d8..33bb48ae58a2 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -137,7 +137,7 @@ static inline u32 pll_get_integloop_gain(u64 frac_start, u64 bclk, u32 ref_clk, base <<= (digclk_divsel == 2 ? 1 : 0); - return (base <= 2046 ? base : 2046); + return base; } static inline u32 pll_get_pll_cmp(u64 fdata, unsigned long ref_clk) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 9c45d641b521..a7a2384044ff 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -115,7 +115,7 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, timer->kms = kms; timer->crtc_idx = crtc_idx; - timer->worker = kthread_create_worker(0, "atomic-worker-%d", crtc_idx); + timer->worker = kthread_run_worker(0, "atomic-worker-%d", crtc_idx); if (IS_ERR(timer->worker)) { int ret = PTR_ERR(timer->worker); timer->worker = NULL; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 34b971490f10..ff7a7a9f7b0d 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -538,7 +538,7 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, /* Only supported if per-process address space is supported: */ if (priv->gpu->aspace == ctx->aspace) - return -EOPNOTSUPP; + return UERR(EOPNOTSUPP, dev, "requires per-process pgtables"); if (should_fail(&fail_gem_iova, obj->size)) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index d8c9a1b19263..fee31680a6d5 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -28,6 +28,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include <drm/display/drm_dsc.h> #include <drm/msm_drm.h> @@ -506,6 +507,12 @@ void msm_hrtimer_work_init(struct msm_hrtimer_work *work, clockid_t clock_id, enum hrtimer_mode mode); +/* Helper for returning a UABI error with optional logging which can make + * it easier for userspace to understand what it is doing wrong. + */ +#define UERR(err, drm, fmt, ...) \ + ({ DRM_DEV_DEBUG_DRIVER((drm)->dev, fmt, ##__VA_ARGS__); -(err); }) + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index fba78193127d..dee470403036 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -20,8 +20,8 @@ /* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable * error msgs for debugging, but we don't spam dmesg by default */ -#define SUBMIT_ERROR(submit, fmt, ...) \ - DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__) +#define SUBMIT_ERROR(err, submit, fmt, ...) \ + UERR(err, (submit)->dev, fmt, ##__VA_ARGS__) /* * Cmdstream submission: @@ -142,8 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || !(submit_bo.flags & MANDATORY_FLAGS)) { - SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid flags: %x\n", submit_bo.flags); i = 0; goto out; } @@ -162,8 +161,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ obj = idr_find(&file->object_idr, submit->bos[i].handle); if (!obj) { - SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); goto out_unlock; } @@ -206,14 +204,12 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; default: - SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type); - return -EINVAL; + return SUBMIT_ERROR(EINVAL, submit, "invalid type: %08x\n", submit_cmd.type); } if (submit_cmd.size % 4) { - SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n", - submit_cmd.size); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "non-aligned cmdstream buffer size: %u\n", + submit_cmd.size); goto out; } @@ -371,9 +367,8 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, struct drm_gem_object **obj, uint64_t *iova) { if (idx >= submit->nr_bos) { - SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n", - idx, submit->nr_bos); - return -EINVAL; + return SUBMIT_ERROR(EINVAL, submit, "invalid buffer index: %u (out of %u)\n", + idx, submit->nr_bos); } if (obj) @@ -392,10 +387,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint32_t *ptr; int ret = 0; - if (offset % 4) { - SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset); - return -EINVAL; - } + if (offset % 4) + return SUBMIT_ERROR(EINVAL, submit, "non-aligned cmdstream buffer: %u\n", offset); /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. @@ -414,9 +407,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint64_t iova; if (submit_reloc.submit_offset % 4) { - SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n", - submit_reloc.submit_offset); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "non-aligned reloc offset: %u\n", + submit_reloc.submit_offset); goto out; } @@ -425,8 +417,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob if ((off >= (obj->size / 4)) || (off < last_offset)) { - SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid offset %u at reloc %u\n", off, i); goto out; } @@ -513,12 +504,12 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, if (syncobj_desc.point && !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) { - ret = -EOPNOTSUPP; + ret = SUBMIT_ERROR(EOPNOTSUPP, submit, "syncobj timeline unsupported"); break; } if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) { - ret = -EINVAL; + ret = -SUBMIT_ERROR(EINVAL, submit, "invalid syncobj flags: %x", syncobj_desc.flags); break; } @@ -531,7 +522,7 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, syncobjs[i] = drm_syncobj_find(file, syncobj_desc.handle); if (!syncobjs[i]) { - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid syncobj handle: %u", i); break; } } @@ -588,14 +579,14 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, post_deps[i].point = syncobj_desc.point; if (syncobj_desc.flags) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid syncobj flags"); break; } if (syncobj_desc.point) { if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) { - ret = -EOPNOTSUPP; + ret = UERR(EOPNOTSUPP, dev, "syncobj timeline unsupported"); break; } @@ -609,7 +600,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, post_deps[i].syncobj = drm_syncobj_find(file, syncobj_desc.handle); if (!post_deps[i].syncobj) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid syncobj handle"); break; } } @@ -677,10 +668,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * be more clever to dispatch to appropriate gpu module: */ if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0) - return -EINVAL; + return UERR(EINVAL, dev, "invalid pipe"); if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) - return -EINVAL; + return UERR(EINVAL, dev, "invalid flags"); if (args->flags & MSM_SUBMIT_SUDO) { if (!IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) || @@ -724,7 +715,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, in_fence = sync_file_get_fence(args->fence_fd); if (!in_fence) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid in-fence"); goto out_unlock; } @@ -787,10 +778,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; if (!submit->cmd[i].size || - ((submit->cmd[i].size + submit->cmd[i].offset) > - obj->size / 4)) { - SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4); - ret = -EINVAL; + (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) { + ret = UERR(EINVAL, dev, "invalid cmdstream size: %u\n", + submit->cmd[i].size * 4); goto out; } @@ -800,8 +790,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, continue; if (!gpu->allow_relocs) { - SUBMIT_ERROR(submit, "relocs not allowed\n"); - ret = -EINVAL; + ret = UERR(EINVAL, dev, "relocs not allowed\n"); goto out; } @@ -827,7 +816,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, (!args->fence || idr_find(&queue->fence_idr, args->fence))) { spin_unlock(&queue->idr_lock); idr_preload_end(); - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid in-fence-sn"); goto out; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0d4a3744cfcb..8557998e0c92 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -859,7 +859,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->funcs = funcs; gpu->name = name; - gpu->worker = kthread_create_worker(0, "gpu-worker"); + gpu->worker = kthread_run_worker(0, "gpu-worker"); if (IS_ERR(gpu->worker)) { ret = PTR_ERR(gpu->worker); gpu->worker = NULL; diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index f3326d09bdbc..38965e12a6bf 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); - priv->kms = NULL; return ret; } @@ -269,7 +268,7 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) /* initialize event thread */ ev_thread = &priv->event_thread[drm_crtc_index(crtc)]; ev_thread->dev = ddev; - ev_thread->worker = kthread_create_worker(0, "crtc_event:%d", crtc->base.id); + ev_thread->worker = kthread_run_worker(0, "crtc_event:%d", crtc->base.id); if (IS_ERR(ev_thread->worker)) { ret = PTR_ERR(ev_thread->worker); DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n"); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 76b6ae35a3cb..dcb49fd30402 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -166,22 +166,32 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); - writel_relaxed(data->ubwc_static, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); + if (data->ubwc_bank_spread) + value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; + + if (data->ubwc_enc_version == UBWC_1_0) + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); + + writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; - u32 value = (data->ubwc_swizzle & 0x1) | - (data->highest_bank_bit & 0x3) << 4 | - (data->macrotile_mode & 0x1) << 12; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle & 0x1) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); + + if (data->macrotile_mode) + value |= MDSS_UBWC_STATIC_MACROTILE_MODE; if (data->ubwc_enc_version == UBWC_3_0) - value |= BIT(10); + value |= MDSS_UBWC_STATIC_UBWC_AMSBC; if (data->ubwc_enc_version == UBWC_1_0) - value |= BIT(8); + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } @@ -189,10 +199,14 @@ static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss) static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; - u32 value = (data->ubwc_swizzle & 0x7) | - (data->ubwc_static & 0x1) << 3 | - (data->highest_bank_bit & 0x7) << 4 | - (data->macrotile_mode & 0x1) << 12; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); + + if (data->ubwc_bank_spread) + value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; + + if (data->macrotile_mode) + value |= MDSS_UBWC_STATIC_MACROTILE_MODE; writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); @@ -572,16 +586,17 @@ static const struct msm_mdss_data sa8775p_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 4, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 0, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, - .ubwc_static = 0x1e, + .ubwc_swizzle = 6, + .ubwc_bank_spread = true, .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; @@ -590,9 +605,9 @@ static const struct msm_mdss_data sc7280_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 1, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; @@ -600,7 +615,7 @@ static const struct msm_mdss_data sc8180x_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -608,9 +623,9 @@ static const struct msm_mdss_data sc8280xp_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -632,7 +647,7 @@ static const struct msm_mdss_data sm6350_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_swizzle = 6, - .ubwc_static = 0x1e, + .ubwc_bank_spread = true, .highest_bank_bit = 1, .reg_bus_bw = 76800, }; @@ -655,7 +670,7 @@ static const struct msm_mdss_data sm6115_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_2_0, .ubwc_swizzle = 7, - .ubwc_static = 0x11f, + .ubwc_bank_spread = true, .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; @@ -667,14 +682,21 @@ static const struct msm_mdss_data sm6125_data = { .highest_bank_bit = 1, }; +static const struct msm_mdss_data sm6150_data = { + .ubwc_enc_version = UBWC_2_0, + .ubwc_dec_version = UBWC_2_0, + .highest_bank_bit = 1, + .reg_bus_bw = 76800, +}; + static const struct msm_mdss_data sm8250_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -682,10 +704,10 @@ static const struct msm_mdss_data sm8350_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; @@ -693,10 +715,10 @@ static const struct msm_mdss_data sm8550_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_3, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 57000, }; @@ -704,10 +726,10 @@ static const struct msm_mdss_data x1e80100_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_3, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, /* TODO: Add reg_bus_bw with real value */ }; @@ -724,6 +746,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sc8280xp-mdss", .data = &sc8280xp_data }, { .compatible = "qcom,sm6115-mdss", .data = &sm6115_data }, { .compatible = "qcom,sm6125-mdss", .data = &sm6125_data }, + { .compatible = "qcom,sm6150-mdss", .data = &sm6150_data }, { .compatible = "qcom,sm6350-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm6375-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm7150-mdss", .data = &sm7150_data }, diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h index 3afef4b1786d..14dc53704314 100644 --- a/drivers/gpu/drm/msm/msm_mdss.h +++ b/drivers/gpu/drm/msm/msm_mdss.h @@ -11,9 +11,9 @@ struct msm_mdss_data { /* can be read from register 0x58 */ u32 ubwc_dec_version; u32 ubwc_swizzle; - u32 ubwc_static; u32 highest_bank_bit; - u32 macrotile_mode; + bool ubwc_bank_spread; + bool macrotile_mode; u32 reg_bus_bw; }; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 2fc3eaf81f44..7fed1de63b5d 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -18,7 +18,7 @@ int msm_file_private_set_sysprof(struct msm_file_private *ctx, switch (sysprof) { default: - return -EINVAL; + return UERR(EINVAL, gpu->dev, "Invalid sysprof: %d", sysprof); case 2: pm_runtime_get_sync(&gpu->pdev->dev); fallthrough; diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml index 6531749d30f4..3d2cc339b8f1 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml @@ -52,6 +52,11 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x23fd" name="GMU_DCVS_PERF_SETTING"/> <reg32 offset="0x23fe" name="GMU_DCVS_BW_SETTING"/> <reg32 offset="0x23ff" name="GMU_DCVS_RETURN"/> + <reg32 offset="0x2bf8" name="GMU_CORE_FW_VERSION"> + <bitfield name="MAJOR" low="28" high="31"/> + <bitfield name="MINOR" low="16" high="27"/> + <bitfield name="STEP" low="0" high="15"/> + </reg32> <reg32 offset="0x4c00" name="GMU_ICACHE_CONFIG"/> <reg32 offset="0x4c01" name="GMU_DCACHE_CONFIG"/> <reg32 offset="0x4c0f" name="GMU_SYS_BUS_CONFIG"/> diff --git a/drivers/gpu/drm/msm/registers/display/mdss.xml b/drivers/gpu/drm/msm/registers/display/mdss.xml index ac85caf1575c..6e9f81cd4690 100644 --- a/drivers/gpu/drm/msm/registers/display/mdss.xml +++ b/drivers/gpu/drm/msm/registers/display/mdss.xml @@ -21,7 +21,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00058" name="UBWC_DEC_HW_VERSION"/> - <reg32 offset="0x00144" name="UBWC_STATIC"/> + <reg32 offset="0x00144" name="UBWC_STATIC"> + <bitfield name="UBWC_SWIZZLE" low="0" high="2"/> + <bitfield name="UBWC_BANK_SPREAD" pos="3"/> + <!-- high=5 for UBWC < 4.0 --> + <bitfield name="HIGHEST_BANK_BIT" low="4" high="6"/> + <bitfield name="UBWC_MIN_ACC_LEN" low="8" high="9"/> + <bitfield name="UBWC_AMSBC" pos="10"/> + <bitfield name="MACROTILE_MODE" pos="12"/> + </reg32> + <reg32 offset="0x00150" name="UBWC_CTRL_2"/> <reg32 offset="0x00154" name="UBWC_PREDICTION_MODE"/> </domain> diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 2e09b5a6d155..504cb3f2054b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -992,8 +992,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, if (!mst_state->pbn_div.full) { struct nouveau_encoder *outp = mstc->mstm->outp; - mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr, - outp->dp.link_bw, outp->dp.link_nr); + mst_state->pbn_div = drm_dp_get_vc_payload_bw(outp->dp.link_bw, outp->dp.link_nr); } slots = drm_dp_atomic_find_time_slots(state, &mstm->mgr, mstc->port, asyh->dp.pbn); diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 8f0c69aad248..21b56cc7605c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) if (ret < 0) return NULL; - return kmemdup(edid, EDID_LENGTH, GFP_KERNEL); + return edid; } bool nouveau_acpi_video_backlight_use_native(void) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 2cb2e5675807..cd659b9fd1d9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -279,7 +279,6 @@ nouveau_channel_ctor(struct nouveau_cli *cli, bool priv, u64 runm, const u64 plength = 0x10000; const u64 ioffset = plength; const u64 ilength = 0x02000; - char name[TASK_COMM_LEN]; int cid, ret; u64 size; @@ -338,8 +337,7 @@ nouveau_channel_ctor(struct nouveau_cli *cli, bool priv, u64 runm, chan->userd = &chan->user; } - get_task_comm(name, current); - snprintf(args.name, sizeof(args.name), "%s[%d]", name, task_pid_nr(current)); + snprintf(args.name, sizeof(args.name), "%s[%d]", current->comm, task_pid_nr(current)); ret = nvif_object_ctor(&device->object, "abi16ChanUser", 0, hosts[cid].oclass, &args, sizeof(args), &chan->user); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8c970f018c00..e154d08857c5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1175,7 +1175,7 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) { struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_cli *cli; - char name[32], tmpname[TASK_COMM_LEN]; + char name[32]; int ret; /* need to bring up power immediately if opening device */ @@ -1185,10 +1185,9 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) return ret; } - get_task_comm(tmpname, current); rcu_read_lock(); snprintf(name, sizeof(name), "%s[%d]", - tmpname, pid_nr(rcu_dereference(fpriv->pid))); + current->comm, pid_nr(rcu_dereference(fpriv->pid))); rcu_read_unlock(); if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 09686d038d60..7cc84472cece 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -387,11 +387,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (f) { struct nouveau_channel *prev; bool must_wait = true; + bool local; rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || - fctx->sync(f, prev, chan) == 0)) + local = prev && prev->cli->drm == chan->cli->drm; + if (local && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); if (!must_wait) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c index 841e3b69fcaf..5a0c9b8a79f3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c @@ -31,6 +31,7 @@ mcp77_sor = { .state = g94_sor_state, .power = nv50_sor_power, .clock = nv50_sor_clock, + .bl = &nv50_sor_bl, .hdmi = &g84_sor_hdmi, .dp = &g94_sor_dp, }; diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b48bba18131..3644a7544b93 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -565,6 +565,8 @@ static int hx83102_get_modes(struct drm_panel *panel, struct drm_display_mode *mode; mode = drm_mode_duplicate(connector->dev, m); + if (!mode) + return -ENOMEM; mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; drm_mode_set_name(mode); diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35950.c b/drivers/gpu/drm/panel/panel-novatek-nt35950.c index b036208f9356..08b22b592ab0 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35950.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35950.c @@ -481,9 +481,9 @@ static int nt35950_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, -EPROBE_DEFER, "Cannot get secondary DSI host\n"); nt->dsi[1] = mipi_dsi_device_register_full(dsi_r_host, info); - if (!nt->dsi[1]) { + if (IS_ERR(nt->dsi[1])) { dev_err(dev, "Cannot get secondary DSI node\n"); - return -ENODEV; + return PTR_ERR(nt->dsi[1]); } num_dsis++; } diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c index eef03d04e0cd..1f72ef7ca74c 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c @@ -1177,6 +1177,7 @@ static int st7701_probe(struct device *dev, int connector_type) return dev_err_probe(dev, ret, "Failed to get orientation\n"); drm_panel_init(&st7701->panel, dev, &st7701_funcs, connector_type); + st7701->panel.prepare_prev_first = true; /** * Once sleep out has been issued, ST7701 IC required to wait 120ms diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c index 169c629746c7..17349825543f 100644 --- a/drivers/gpu/drm/panel/panel-synaptics-r63353.c +++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c @@ -325,7 +325,7 @@ static void r63353_panel_shutdown(struct mipi_dsi_device *dsi) { struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi); - r63353_panel_unprepare(&rpanel->base); + drm_panel_unprepare(&rpanel->base); } static const struct r63353_desc sharp_ls068b3sx02_data = { diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 129e813cfd1b..22e2d959eb31 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -1095,6 +1095,64 @@ static void drm_test_connector_hdmi_init_formats_no_rgb(struct kunit *test) KUNIT_EXPECT_LT(test, ret, 0); } +struct drm_connector_hdmi_init_formats_yuv420_allowed_test { + unsigned long supported_formats; + bool yuv420_allowed; + int expected_result; +}; + +#define YUV420_ALLOWED_TEST(_formats, _allowed, _result) \ + { \ + .supported_formats = BIT(HDMI_COLORSPACE_RGB) | (_formats), \ + .yuv420_allowed = _allowed, \ + .expected_result = _result, \ + } + +static const struct drm_connector_hdmi_init_formats_yuv420_allowed_test +drm_connector_hdmi_init_formats_yuv420_allowed_tests[] = { + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV420), true, 0), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV420), false, -EINVAL), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV422), true, -EINVAL), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV422), false, 0), +}; + +static void +drm_connector_hdmi_init_formats_yuv420_allowed_desc(const struct drm_connector_hdmi_init_formats_yuv420_allowed_test *t, + char *desc) +{ + sprintf(desc, "supported_formats=0x%lx yuv420_allowed=%d", + t->supported_formats, t->yuv420_allowed); +} + +KUNIT_ARRAY_PARAM(drm_connector_hdmi_init_formats_yuv420_allowed, + drm_connector_hdmi_init_formats_yuv420_allowed_tests, + drm_connector_hdmi_init_formats_yuv420_allowed_desc); + +/* + * Test that the registration of an HDMI connector succeeds only when + * the presence of YUV420 in the supported formats matches the value + * of the ycbcr_420_allowed flag. + */ +static void drm_test_connector_hdmi_init_formats_yuv420_allowed(struct kunit *test) +{ + const struct drm_connector_hdmi_init_formats_yuv420_allowed_test *params; + struct drm_connector_init_priv *priv = test->priv; + int ret; + + params = test->param_value; + priv->connector.ycbcr_420_allowed = params->yuv420_allowed; + + ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", + &dummy_funcs, + &dummy_hdmi_funcs, + DRM_MODE_CONNECTOR_HDMIA, + &priv->ddc, + params->supported_formats, + 8); + KUNIT_EXPECT_EQ(test, ret, params->expected_result); +} + /* * Test that the registration of an HDMI connector with an HDMI * connector type succeeds. @@ -1186,6 +1244,8 @@ static struct kunit_case drmm_connector_hdmi_init_tests[] = { KUNIT_CASE(drm_test_connector_hdmi_init_bpc_null), KUNIT_CASE(drm_test_connector_hdmi_init_formats_empty), KUNIT_CASE(drm_test_connector_hdmi_init_formats_no_rgb), + KUNIT_CASE_PARAM(drm_test_connector_hdmi_init_formats_yuv420_allowed, + drm_connector_hdmi_init_formats_yuv420_allowed_gen_params), KUNIT_CASE(drm_test_connector_hdmi_init_null_ddc), KUNIT_CASE(drm_test_connector_hdmi_init_null_product), KUNIT_CASE(drm_test_connector_hdmi_init_null_vendor), diff --git a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c index 89cd9e4f4d32..9e0e2fb65944 100644 --- a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c +++ b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c @@ -199,10 +199,8 @@ static const struct drm_dp_mst_calc_pbn_div_test drm_dp_mst_calc_pbn_div_dp1_4_c static void drm_test_dp_mst_calc_pbn_div(struct kunit *test) { const struct drm_dp_mst_calc_pbn_div_test *params = test->param_value; - /* mgr->dev is only needed by drm_dbg_kms(), but it's not called for the test cases. */ - struct drm_dp_mst_topology_mgr *mgr = test->priv; - KUNIT_EXPECT_EQ(test, drm_dp_get_vc_payload_bw(mgr, params->link_rate, params->lane_count).full, + KUNIT_EXPECT_EQ(test, drm_dp_get_vc_payload_bw(params->link_rate, params->lane_count).full, params->expected.full); } @@ -568,21 +566,8 @@ static struct kunit_case drm_dp_mst_helper_tests[] = { { } }; -static int drm_dp_mst_helper_tests_init(struct kunit *test) -{ - struct drm_dp_mst_topology_mgr *mgr; - - mgr = kunit_kzalloc(test, sizeof(*mgr), GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, mgr); - - test->priv = mgr; - - return 0; -} - static struct kunit_suite drm_dp_mst_helper_test_suite = { .name = "drm_dp_mst_helper", - .init = drm_dp_mst_helper_tests_init, .test_cases = drm_dp_mst_helper_tests, }; diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index c3b693bb966f..b976a5e9aef5 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -1568,6 +1568,57 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes KUNIT_EXPECT_EQ(test, conn_state->hdmi.output_format, HDMI_COLORSPACE_RGB); } +/* Test that atomic check succeeds when disabling a connector. */ +static void drm_test_check_disable_connector(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_modeset_acquire_ctx *ctx; + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + struct drm_atomic_state *state; + struct drm_display_mode *preferred; + struct drm_connector *conn; + struct drm_device *drm; + struct drm_crtc *crtc; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); + KUNIT_ASSERT_NOT_NULL(test, priv); + + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + + conn = &priv->connector; + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NOT_NULL(test, preferred); + + drm = &priv->drm; + crtc = priv->crtc; + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); + KUNIT_ASSERT_EQ(test, ret, 0); + + state = drm_kunit_helper_atomic_state_alloc(test, drm, ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + + crtc_state->active = false; + ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL); + KUNIT_EXPECT_EQ(test, ret, 0); + + conn_state = drm_atomic_get_connector_state(state, conn); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + + ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); + KUNIT_EXPECT_EQ(test, ret, 0); + + ret = drm_atomic_check_only(state); + KUNIT_ASSERT_EQ(test, ret, 0); +} + static struct kunit_case drm_atomic_helper_connector_hdmi_check_tests[] = { KUNIT_CASE(drm_test_check_broadcast_rgb_auto_cea_mode), KUNIT_CASE(drm_test_check_broadcast_rgb_auto_cea_mode_vic_1), @@ -1582,6 +1633,7 @@ static struct kunit_case drm_atomic_helper_connector_hdmi_check_tests[] = { */ KUNIT_CASE(drm_test_check_broadcast_rgb_crtc_mode_changed), KUNIT_CASE(drm_test_check_broadcast_rgb_crtc_mode_not_changed), + KUNIT_CASE(drm_test_check_disable_connector), KUNIT_CASE(drm_test_check_hdmi_funcs_reject_rate), KUNIT_CASE(drm_test_check_max_tmds_rate_bpc_fallback), KUNIT_CASE(drm_test_check_max_tmds_rate_format_fallback), diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index 04a6b8cc62ac..3c0b7824c0be 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -320,8 +320,7 @@ static void kunit_action_drm_mode_destroy(void *ptr) } /** - * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC - for a KUnit test + * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test * @test: The test context object * @dev: DRM device * @video_code: CEA VIC of the mode diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 89a699370a59..c67e1f906785 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -757,7 +757,6 @@ static void bochs_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); - drm_dev_put(dev); } static void bochs_pci_shutdown(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index 3139fd9128d8..f8f20d2f6174 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c @@ -258,13 +258,13 @@ static void ttm_bo_unreserve_basic(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); bo->priority = bo_prio; - err = ttm_resource_alloc(bo, place, &res1); + err = ttm_resource_alloc(bo, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res1; /* Add a dummy resource to populate LRU */ - ttm_resource_alloc(bo, place, &res2); + ttm_resource_alloc(bo, place, &res2, NULL); dma_resv_lock(bo->base.resv, NULL); ttm_bo_unreserve(bo); @@ -300,12 +300,12 @@ static void ttm_bo_unreserve_pinned(struct kunit *test) dma_resv_lock(bo->base.resv, NULL); ttm_bo_pin(bo); - err = ttm_resource_alloc(bo, place, &res1); + err = ttm_resource_alloc(bo, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res1; /* Add a dummy resource to the pinned list */ - err = ttm_resource_alloc(bo, place, &res2); + err = ttm_resource_alloc(bo, place, &res2, NULL); KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, list_is_last(&res2->lru.link, &priv->ttm_dev->unevictable), 1); @@ -355,7 +355,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) ttm_bo_set_bulk_move(bo1, &lru_bulk_move); dma_resv_unlock(bo1->base.resv); - err = ttm_resource_alloc(bo1, place, &res1); + err = ttm_resource_alloc(bo1, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo1->resource = res1; @@ -363,7 +363,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) ttm_bo_set_bulk_move(bo2, &lru_bulk_move); dma_resv_unlock(bo2->base.resv); - err = ttm_resource_alloc(bo2, place, &res2); + err = ttm_resource_alloc(bo2, place, &res2, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo2->resource = res2; @@ -401,7 +401,7 @@ static void ttm_bo_put_basic(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); bo->type = ttm_bo_type_device; - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; @@ -518,7 +518,7 @@ static void ttm_bo_pin_unpin_resource(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; @@ -569,7 +569,7 @@ static void ttm_bo_multiple_pin_one_unpin(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 1adf18481ea0..3148f5d3dbd6 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -542,7 +542,7 @@ static void ttm_bo_validate_no_placement_signaled(struct kunit *test) bo->ttm = old_tt; } - err = ttm_resource_alloc(bo, place, &bo->resource); + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); KUNIT_EXPECT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, man->usage, size); @@ -603,7 +603,7 @@ static void ttm_bo_validate_no_placement_not_signaled(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, size, NULL); bo->type = params->bo_type; - err = ttm_resource_alloc(bo, place, &bo->resource); + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); KUNIT_EXPECT_EQ(test, err, 0); placement = kunit_kzalloc(test, sizeof(*placement), GFP_KERNEL); diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c index a9f4b81921c3..e6ea2bd01f07 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c @@ -302,7 +302,7 @@ static void ttm_sys_man_free_basic(struct kunit *test) res = kunit_kzalloc(test, sizeof(*res), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, res); - ttm_resource_alloc(bo, place, &res); + ttm_resource_alloc(bo, place, &res, NULL); man = ttm_manager_type(priv->devs->ttm_dev, mem_type); man->func->free(man, res); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 06d6a452c4f4..95b86003c50d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -42,6 +42,7 @@ #include <linux/file.h> #include <linux/module.h> #include <linux/atomic.h> +#include <linux/cgroup_dmem.h> #include <linux/dma-resv.h> #include "ttm_module.h" @@ -500,6 +501,13 @@ struct ttm_bo_evict_walk { struct ttm_resource **res; /** @evicted: Number of successful evictions. */ unsigned long evicted; + + /** @limit_pool: Which pool limit we should test against */ + struct dmem_cgroup_pool_state *limit_pool; + /** @try_low: Whether we should attempt to evict BO's with low watermark threshold */ + bool try_low; + /** @hit_low: If we cannot evict a bo when @try_low is false (first pass) */ + bool hit_low; }; static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) @@ -508,6 +516,10 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * container_of(walk, typeof(*evict_walk), walk); s64 lret; + if (!dmem_cgroup_state_evict_valuable(evict_walk->limit_pool, bo->resource->css, + evict_walk->try_low, &evict_walk->hit_low)) + return 0; + if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place)) return 0; @@ -525,7 +537,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * evict_walk->evicted++; if (evict_walk->res) lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place, - evict_walk->res); + evict_walk->res, NULL); if (lret == 0) return 1; out: @@ -546,7 +558,8 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, struct ttm_buffer_object *evictor, struct ttm_operation_ctx *ctx, struct ww_acquire_ctx *ticket, - struct ttm_resource **res) + struct ttm_resource **res, + struct dmem_cgroup_pool_state *limit_pool) { struct ttm_bo_evict_walk evict_walk = { .walk = { @@ -557,22 +570,39 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, .place = place, .evictor = evictor, .res = res, + .limit_pool = limit_pool, }; s64 lret; evict_walk.walk.trylock_only = true; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + + /* One more attempt if we hit low limit? */ + if (!lret && evict_walk.hit_low) { + evict_walk.try_low = true; + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + } if (lret || !ticket) goto out; + /* Reset low limit */ + evict_walk.try_low = evict_walk.hit_low = false; /* If ticket-locking, repeat while making progress. */ evict_walk.walk.trylock_only = false; + +retry: do { /* The walk may clear the evict_walk.walk.ticket field */ evict_walk.walk.ticket = ticket; evict_walk.evicted = 0; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); } while (!lret && evict_walk.evicted); + + /* We hit the low limit? Try once more */ + if (!lret && evict_walk.hit_low && !evict_walk.try_low) { + evict_walk.try_low = true; + goto retry; + } out: if (lret < 0) return lret; @@ -690,6 +720,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, for (i = 0; i < placement->num_placement; ++i) { const struct ttm_place *place = &placement->placement[i]; + struct dmem_cgroup_pool_state *limit_pool = NULL; struct ttm_resource_manager *man; bool may_evict; @@ -702,15 +733,20 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, continue; may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); - ret = ttm_resource_alloc(bo, place, res); + ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL); if (ret) { - if (ret != -ENOSPC) + if (ret != -ENOSPC && ret != -EAGAIN) { + dmem_cgroup_pool_state_put(limit_pool); return ret; - if (!may_evict) + } + if (!may_evict) { + dmem_cgroup_pool_state_put(limit_pool); continue; + } ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx, - ticket, res); + ticket, res, limit_pool); + dmem_cgroup_pool_state_put(limit_pool); if (ret == -EBUSY) continue; if (ret) @@ -1057,6 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; + + bool hit_low, evict_low; }; static s64 @@ -1107,7 +1145,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) memset(&hop, 0, sizeof(hop)); place.mem_type = TTM_PL_SYSTEM; - ret = ttm_resource_alloc(bo, &place, &evict_mem); + ret = ttm_resource_alloc(bo, &place, &evict_mem, NULL); if (ret) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 0cf91d23f25c..a194db83421d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -405,13 +405,25 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, return len; } -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) +/** + * ttm_bo_access - Helper to access a buffer object + * + * @bo: ttm buffer object + * @offset: access offset into buffer object + * @buf: pointer to caller memory to read into or write from + * @len: length of access + * @write: write access + * + * Utility function to access a buffer object. Useful when buffer object cannot + * be easily mapped (non-contiguous, non-visible, etc...). Should not directly + * be exported to user space via a peak / poke interface. + * + * Returns: + * @len if successful, negative error code on failure. + */ +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, + void *buf, int len, int write) { - struct ttm_buffer_object *bo = vma->vm_private_data; - unsigned long offset = (addr) - vma->vm_start + - ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) - << PAGE_SHIFT); int ret; if (len < 1 || (offset + len) > bo->base.size) @@ -429,8 +441,8 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, break; default: if (bo->bdev->funcs->access_memory) - ret = bo->bdev->funcs->access_memory( - bo, offset, buf, len, write); + ret = bo->bdev->funcs->access_memory + (bo, offset, buf, len, write); else ret = -EIO; } @@ -439,6 +451,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } +EXPORT_SYMBOL(ttm_bo_access); + +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; + unsigned long offset = (addr) - vma->vm_start + + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) + << PAGE_SHIFT); + + return ttm_bo_access(bo, offset, buf, len, write); +} EXPORT_SYMBOL(ttm_bo_vm_access); static const struct vm_operations_struct ttm_bo_vm_ops = { diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index e19360cc7930..1644beb7a745 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -26,6 +26,7 @@ #include <linux/io-mapping.h> #include <linux/iosys-map.h> #include <linux/scatterlist.h> +#include <linux/cgroup_dmem.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> @@ -367,15 +368,28 @@ EXPORT_SYMBOL(ttm_resource_fini); int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource **res_ptr) + struct ttm_resource **res_ptr, + struct dmem_cgroup_pool_state **ret_limit_pool) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, place->mem_type); + struct dmem_cgroup_pool_state *pool = NULL; int ret; + if (man->cg) { + ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool); + if (ret) + return ret; + } + ret = man->func->alloc(man, bo, place, res_ptr); - if (ret) + if (ret) { + if (pool) + dmem_cgroup_uncharge(pool, bo->base.size); return ret; + } + + (*res_ptr)->css = pool; spin_lock(&bo->bdev->lru_lock); ttm_resource_add_bulk_move(*res_ptr, bo); @@ -387,6 +401,7 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_resource_alloc); void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) { struct ttm_resource_manager *man; + struct dmem_cgroup_pool_state *pool; if (!*res) return; @@ -394,9 +409,13 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) spin_lock(&bo->bdev->lru_lock); ttm_resource_del_bulk_move(*res, bo); spin_unlock(&bo->bdev->lru_lock); + + pool = (*res)->css; man = ttm_manager_type(bo->bdev, (*res)->mem_type); man->func->free(man, *res); *res = NULL; + if (man->cg) + dmem_cgroup_uncharge(pool, bo->base.size); } EXPORT_SYMBOL(ttm_resource_free); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 20bf33702c3c..72b6a119412f 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -107,7 +107,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN); trace_v3d_bcl_irq(&v3d->drm, fence->seqno); + + v3d->bin_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -117,7 +120,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER); trace_v3d_rcl_irq(&v3d->drm, fence->seqno); + + v3d->render_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -127,7 +133,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD); trace_v3d_csd_irq(&v3d->drm, fence->seqno); + + v3d->csd_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -164,7 +173,10 @@ v3d_hub_irq(int irq, void *arg) v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU); trace_v3d_tfu_irq(&v3d->drm, fence->seqno); + + v3d->tfu_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index a0e433fbcba6..9b5b8c1f063b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -228,7 +228,6 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv, VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_VRAM); buf->places[0].lpfn = PFN_UP(bo->resource->size); - buf->busy_places[0].lpfn = PFN_UP(bo->resource->size); ret = ttm_bo_validate(bo, &buf->placement, &ctx); /* For some reason we didn't end up at the start of vram */ @@ -443,7 +442,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, if (params->pin) ttm_bo_pin(&vmw_bo->tbo); - ttm_bo_unreserve(&vmw_bo->tbo); + if (!params->keep_resv) + ttm_bo_unreserve(&vmw_bo->tbo); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 43b5439ec9f7..11e330c7c7f5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -56,8 +56,9 @@ struct vmw_bo_params { u32 domain; u32 busy_domain; enum ttm_bo_type bo_type; - size_t size; bool pin; + bool keep_resv; + size_t size; struct dma_resv *resv; struct sg_table *sg; }; @@ -83,7 +84,6 @@ struct vmw_bo { struct ttm_placement placement; struct ttm_place places[5]; - struct ttm_place busy_places[5]; /* Protected by reservation */ struct ttm_bo_kmap_obj map; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 1699236fca5a..0f32471c8533 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -403,7 +403,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_kernel, .size = PAGE_SIZE, - .pin = true + .pin = true, + .keep_resv = true, }; /* @@ -415,10 +416,6 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); - vmw_bo_pin_reserved(vbo, true); - ret = ttm_bo_kmap(&vbo->tbo, 0, 1, &map); if (likely(ret == 0)) { result = ttm_kmap_obj_virtual(&map, &dummy); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index b9857f37ca1a..ed5015ced392 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -206,6 +206,7 @@ struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, .bo_type = ttm_bo_type_sg, .size = attach->dmabuf->size, .pin = false, + .keep_resv = true, .resv = attach->dmabuf->resv, .sg = table, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index c896de07f7b0..1912ac1cde6d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -750,6 +750,7 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); struct vmw_bo *old_bo = NULL; struct vmw_bo *new_bo = NULL; + struct ww_acquire_ctx ctx; s32 hotspot_x, hotspot_y; int ret; @@ -769,9 +770,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + ww_acquire_init(&ctx, &reservation_ww_class); + if (!vmw_user_object_is_null(&old_vps->uo)) { old_bo = vmw_user_object_buffer(&old_vps->uo); - ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, &ctx); if (ret != 0) return; } @@ -779,9 +782,14 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (!vmw_user_object_is_null(&vps->uo)) { new_bo = vmw_user_object_buffer(&vps->uo); if (old_bo != new_bo) { - ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); - if (ret != 0) + ret = ttm_bo_reserve(&new_bo->tbo, false, false, &ctx); + if (ret != 0) { + if (old_bo) { + ttm_bo_unreserve(&old_bo->tbo); + ww_acquire_fini(&ctx); + } return; + } } else { new_bo = NULL; } @@ -803,10 +811,12 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } - if (old_bo) - ttm_bo_unreserve(&old_bo->tbo); if (new_bo) ttm_bo_unreserve(&new_bo->tbo); + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + + ww_acquire_fini(&ctx); du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index a01ca3226d0a..7fb1c88bcc47 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -896,7 +896,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_device, .size = size, - .pin = true + .pin = true, + .keep_resv = true, }; if (!vmw_shader_id_ok(user_key, shader_type)) @@ -906,10 +907,6 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out; - ret = ttm_bo_reserve(&buf->tbo, false, true, NULL); - if (unlikely(ret != 0)) - goto no_reserve; - /* Map and copy shader bytecode. */ ret = ttm_bo_kmap(&buf->tbo, 0, PFN_UP(size), &map); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 621d98b376bb..5553892d7c3e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -572,15 +572,14 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv, .busy_domain = domain, .bo_type = ttm_bo_type_kernel, .size = bo_size, - .pin = true + .pin = true, + .keep_resv = true, }; ret = vmw_bo_create(dev_priv, &bo_params, &vbo); if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); ret = vmw_ttm_populate(vbo->tbo.bdev, vbo->tbo.ttm, &ctx); if (likely(ret == 0)) { struct vmw_ttm_tt *vmw_tt = diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 2de0de41b8dd..0d749ed44878 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM bool "Enable passing SYS/VRAM addresses to user space" default n help - Pass object location trough uapi. Intended for extended + Pass object location through uapi. Intended for extended testing and development only. Recommended for driver developers only. @@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT Choose this option when debugging error paths that are hit during checks for userptr invalidations. - Recomended for driver developers only. + Recommended for driver developers only. If in doubt, say "N". diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index bc7a04ce69fd..5c97ad6ed738 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -101,6 +101,7 @@ xe-y += xe_bb.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ + xe_trace_lrc.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ @@ -110,6 +111,7 @@ xe-y += xe_bb.o \ xe_vm.o \ xe_vram.o \ xe_vram_freq.o \ + xe_vsec.o \ xe_wait_user_fence.o \ xe_wa.o \ xe_wopcm.o @@ -124,7 +126,8 @@ xe-y += \ xe_gt_sriov_vf.o \ xe_guc_relay.o \ xe_memirq.o \ - xe_sriov.o + xe_sriov.o \ + xe_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ @@ -206,6 +209,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_ddi.o \ i915-display/intel_ddi_buf_trans.o \ i915-display/intel_display.o \ + i915-display/intel_display_conversion.o \ i915-display/intel_display_device.o \ i915-display/intel_display_driver.o \ i915-display/intel_display_irq.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index b54fe40fc5a9..fee385532fb0 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -134,6 +134,8 @@ enum xe_guc_action { XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + XE_GUC_ACTION_REGISTER_G2G = 0x4507, + XE_GUC_ACTION_DEREGISTER_G2G = 0x4508, XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, @@ -218,4 +220,22 @@ enum xe_guc_tlb_inval_mode { XE_GUC_TLB_INVAL_MODE_LITE = 0x1, }; +/* + * GuC to GuC communication (de-)registration fields: + */ +enum xe_guc_g2g_type { + XE_G2G_TYPE_IN = 0x0, + XE_G2G_TYPE_OUT, + XE_G2G_TYPE_LIMIT, +}; + +#define XE_G2G_REGISTER_DEVICE REG_GENMASK(16, 16) +#define XE_G2G_REGISTER_TILE REG_GENMASK(15, 12) +#define XE_G2G_REGISTER_TYPE REG_GENMASK(11, 8) +#define XE_G2G_REGISTER_SIZE REG_GENMASK(7, 0) + +#define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16) +#define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) +#define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index b6a1852749dd..0b28659d94e9 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -502,6 +502,44 @@ #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 /** + * DOC: VF2GUC_NOTIFY_RESFIX_DONE + * + * This action is used by VF to notify the GuC that the VF KMD has completed + * post-migration recovery steps. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u + +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 + +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** * DOC: VF2GUC_QUERY_SINGLE_KLV * * This action is used by VF to query value of the single KLV data. diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h index e7898edc6236..dd4117553739 100644 --- a/drivers/gpu/drm/xe/abi/guc_capture_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h @@ -25,7 +25,7 @@ enum guc_state_capture_type { #define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1) -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum guc_capture_list_class_type { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 37606cf8cc5e..d633f1c739e4 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -132,7 +132,7 @@ enum { * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 * This config sets whether strict scheduling is enabled whereby any VF * that doesn’t have work to submit is still allocated a fixed execution - * time-slice to ensure active VFs execution is always consitent even + * time-slice to ensure active VFs execution is always consistent even * during other VF reprovisiong / rebooting events. Changing this KLV * impacts all VFs and takes effect on the next VF-Switch event. * @@ -207,7 +207,7 @@ enum { * of and this will never be perfectly-exact (accumulated nano-second * granularity) since the GPUs clock time runs off a different crystal * from the CPUs clock. Changing this KLV on a VF that is currently - * running a context wont take effect until a new context is scheduled in. + * running a context won't take effect until a new context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an * infinitely long compute or shader kernel. In such a scenario, the @@ -227,7 +227,7 @@ enum { * HW is capable and this will never be perfectly-exact (accumulated * nano-second granularity) since the GPUs clock time runs off a * different crystal from the CPUs clock. Changing this KLV on a VF - * that is currently running a context wont take effect until a new + * that is currently running a context won't take effect until a new * context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an @@ -291,6 +291,14 @@ enum { * * :0: (default) * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C + * This config controls VF’s scheduling priority. + * + * :0: LOW = schedule VF only if it has active work (default) + * :1: NORMAL = schedule VF always, irrespective of whether it has work or not + * :2: HIGH = schedule VF in the next time-slice after current active + * time-slice completes if it has active work */ #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 @@ -343,6 +351,12 @@ enum { #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u +#define GUC_SCHED_PRIORITY_LOW 0u +#define GUC_SCHED_PRIORITY_NORMAL 1u +#define GUC_SCHED_PRIORITY_HIGH 2u + /* * Workaround keys: */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 0382beb4035b..4fc3e535de91 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -10,6 +10,11 @@ #include "xe_device_types.h" #include "xe_mmio.h" +static inline struct intel_uncore *to_intel_uncore(struct drm_device *drm) +{ + return &to_xe_device(drm)->uncore; +} + static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore) { struct xe_device *xe = container_of(uncore, struct xe_device, uncore); @@ -117,10 +122,19 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, unsigned int slow_timeout_ms, u32 *out_value) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + bool atomic; + + /* + * Replicate the behavior from i915 here, in which sleep is not + * performed if slow_timeout_ms == 0. This is necessary because + * of some paths in display code where waits are done in atomic + * context. + */ + atomic = !slow_timeout_ms && fast_timeout_us > 0; return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, fast_timeout_us + 1000 * slow_timeout_ms, - out_value, false); + out_value, atomic); } static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h index d429d421ac70..d429d421ac70 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c index a7dbc6554d69..ac4cda2d81c7 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_irq.c +++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c @@ -53,18 +53,7 @@ void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs, bool intel_irqs_enabled(struct xe_device *xe) { - /* - * XXX: i915 has a racy handling of the irq.enabled, since it doesn't - * lock its transitions. Because of that, the irq.enabled sometimes - * is not read with the irq.lock in place. - * However, the most critical cases like vblank and page flips are - * properly using the locks. - * We cannot take the lock in here or run any kind of assert because - * of i915 inconsistency. - * But at this point the xe irq is better protected against races, - * although the full solution would be protecting the i915 side. - */ - return xe->irq.enabled; + return atomic_read(&xe->irq.enabled); } void intel_synchronize_irq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 9f54fad0f1c0..b463f5bd4eed 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -40,31 +40,8 @@ int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size) { struct xe_bo *bo = gem_to_xe_bo(obj); - struct ttm_bo_kmap_obj map; - void *src; - bool is_iomem; - int ret; - ret = xe_bo_lock(bo, true); - if (ret) - return ret; - - ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map); - if (ret) - goto out_unlock; - - offset &= ~PAGE_MASK; - src = ttm_kmap_obj_virtual(&map, &is_iomem); - src += offset; - if (is_iomem) - memcpy_fromio(dst, (void __iomem *)src, size); - else - memcpy(dst, src, size); - - ttm_bo_kunmap(&map); -out_unlock: - xe_bo_unlock(bo); - return ret; + return xe_bo_read(bo, offset, dst, size); } struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index b5502f335f53..b3921dbc52ff 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -22,6 +22,7 @@ #include "intel_display_irq.h" #include "intel_display_types.h" #include "intel_dmc.h" +#include "intel_dmc_wl.h" #include "intel_dp.h" #include "intel_encoder.h" #include "intel_fbdev.h" @@ -103,11 +104,12 @@ int xe_display_create(struct xe_device *xe) static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + struct intel_display *display = &xe->display; if (!xe->info.probe_display) return; - intel_power_domains_cleanup(xe); + intel_power_domains_cleanup(display); } int xe_display_init_nommio(struct xe_device *xe) @@ -132,7 +134,7 @@ static void xe_display_fini_noirq(void *arg) if (!xe->info.probe_display) return; - intel_display_driver_remove_noirq(xe); + intel_display_driver_remove_noirq(display); intel_opregion_cleanup(display); } @@ -144,7 +146,7 @@ int xe_display_init_noirq(struct xe_device *xe) if (!xe->info.probe_display) return 0; - intel_display_driver_early_probe(xe); + intel_display_driver_early_probe(display); /* Early display init.. */ intel_opregion_setup(display); @@ -157,9 +159,9 @@ int xe_display_init_noirq(struct xe_device *xe) intel_bw_init_hw(xe); - intel_display_device_info_runtime_init(xe); + intel_display_device_info_runtime_init(display); - err = intel_display_driver_probe_noirq(xe); + err = intel_display_driver_probe_noirq(display); if (err) { intel_opregion_cleanup(display); return err; @@ -171,21 +173,23 @@ int xe_display_init_noirq(struct xe_device *xe) static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; + struct intel_display *display = &xe->display; if (!xe->info.probe_display) return; - intel_display_driver_remove_nogem(xe); + intel_display_driver_remove_nogem(display); } int xe_display_init_noaccel(struct xe_device *xe) { + struct intel_display *display = &xe->display; int err; if (!xe->info.probe_display) return 0; - err = intel_display_driver_probe_nogem(xe); + err = intel_display_driver_probe_nogem(display); if (err) return err; @@ -194,10 +198,12 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return 0; - return intel_display_driver_probe(xe); + return intel_display_driver_probe(display); } void xe_display_fini(struct xe_device *xe) @@ -215,30 +221,36 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; - intel_display_driver_register(xe); + intel_display_driver_register(display); + intel_power_domains_enable(display); intel_register_dsm_handler(); - intel_power_domains_enable(xe); } void xe_display_unregister(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); - intel_power_domains_disable(xe); - intel_display_driver_unregister(xe); + intel_power_domains_disable(display); + intel_display_driver_unregister(display); } void xe_display_driver_remove(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; - intel_display_driver_remove(xe); + intel_display_driver_remove(display); } /* IRQ-related functions */ @@ -322,25 +334,22 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) * We do a lot of poking in a lot of registers, make sure they work * properly. */ - intel_power_domains_disable(xe); + intel_power_domains_disable(display); if (!runtime) intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - intel_display_driver_disable_user_access(xe); - intel_display_driver_suspend(xe); + intel_display_driver_disable_user_access(display); + intel_display_driver_suspend(display); } xe_display_flush_cleanup_work(xe); - if (!runtime) - intel_dp_mst_suspend(xe); - intel_hpd_cancel_work(xe); if (!runtime && has_display(xe)) { - intel_display_driver_suspend_access(xe); + intel_display_driver_suspend_access(display); intel_encoder_suspend_all(&xe->display); } @@ -364,20 +373,20 @@ void xe_display_pm_shutdown(struct xe_device *xe) if (!xe->info.probe_display) return; - intel_power_domains_disable(xe); + intel_power_domains_disable(display); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - intel_display_driver_disable_user_access(xe); - intel_display_driver_suspend(xe); + intel_display_driver_disable_user_access(display); + intel_display_driver_suspend(display); } xe_display_flush_cleanup_work(xe); - intel_dp_mst_suspend(xe); + intel_dp_mst_suspend(display); intel_hpd_cancel_work(xe); if (has_display(xe)) - intel_display_driver_suspend_access(xe); + intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); intel_encoder_shutdown_all(display); @@ -402,17 +411,37 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe) { + struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); + + if (!xe->info.probe_display) + return; + + intel_display_power_suspend_late(display, s2idle); +} + +void xe_display_pm_runtime_suspend_late(struct xe_device *xe) +{ + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; - intel_power_domains_suspend(xe, s2idle); + if (xe->d3cold.allowed) + xe_display_pm_suspend_late(xe); - intel_display_power_suspend_late(xe); + /* + * If xe_display_pm_suspend_late() is not called, it is likely + * that we will be on dynamic DC states with DMC wakelock enabled. We + * need to flush the release work in that case. + */ + intel_dmc_wl_flush_release_work(display); } void xe_display_pm_shutdown_late(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; @@ -421,17 +450,17 @@ void xe_display_pm_shutdown_late(struct xe_device *xe) * for now leaving all display power wells in the INIT power domain * enabled. */ - intel_power_domains_driver_remove(xe); + intel_power_domains_driver_remove(display); } void xe_display_pm_resume_early(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; - intel_display_power_resume_early(xe); - - intel_power_domains_resume(xe); + intel_display_power_resume_early(display); } static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) @@ -446,20 +475,17 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) if (has_display(xe)) drm_mode_config_reset(&xe->drm); - intel_display_driver_init_hw(xe); - intel_hpd_init(xe); + intel_display_driver_init_hw(display); if (!runtime && has_display(xe)) - intel_display_driver_resume_access(xe); + intel_display_driver_resume_access(display); - /* MST sideband requires HPD interrupts enabled */ - if (!runtime) - intel_dp_mst_resume(xe); + intel_hpd_init(xe); if (!runtime && has_display(xe)) { - intel_display_driver_resume(xe); + intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); - intel_display_driver_enable_user_access(xe); + intel_display_driver_enable_user_access(display); } if (has_display(xe)) @@ -470,7 +496,7 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) if (!runtime) intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); - intel_power_domains_enable(xe); + intel_power_domains_enable(display); } void xe_display_pm_resume(struct xe_device *xe) @@ -495,21 +521,23 @@ void xe_display_pm_runtime_resume(struct xe_device *xe) static void display_device_remove(struct drm_device *dev, void *arg) { - struct xe_device *xe = arg; + struct intel_display *display = arg; - intel_display_device_remove(xe); + intel_display_device_remove(display); } int xe_display_probe(struct xe_device *xe) { + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct intel_display *display; int err; if (!xe->info.probe_display) goto no_display; - intel_display_device_probe(xe); + display = intel_display_device_probe(pdev); - err = drmm_add_action_or_reset(&xe->drm, display_device_remove, xe); + err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display); if (err) return err; diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 17afa537aee5..233f81a26c25 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -41,6 +41,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe); void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_suspend_late(struct xe_device *xe); void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -74,6 +75,7 @@ static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe) {} static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 761510ae0690..9fa51b84737c 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -161,7 +161,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, } vma->dpt = dpt; - vma->node = dpt->ggtt_node; + vma->node = dpt->ggtt_node[tile0->id]; return 0; } @@ -213,8 +213,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node; + if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[ggtt->tile->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; @@ -345,10 +345,12 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { + u8 tile_id = vma->node->ggtt->tile->id; + if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || - vma->bo->ggtt_node->base.start != vma->node->base.start) + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || + vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start) xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 8c113463a3d5..2eb9633f163a 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -275,12 +275,12 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config) } } -void intel_initial_plane_config(struct drm_i915_private *i915) +void intel_initial_plane_config(struct intel_display *display) { struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {}; struct intel_crtc *crtc; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_initial_plane_config *plane_config = &plane_configs[crtc->pipe]; @@ -294,7 +294,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915) * can even allow for smooth boot transitions if the BIOS * fb is large enough for the active pipe configuration. */ - i915->display.funcs.display->get_initial_plane_config(crtc, plane_config); + display->funcs.display->get_initial_plane_config(crtc, plane_config); /* * If the fb is shared between multiple heads, we'll @@ -302,7 +302,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915) */ intel_find_initial_plane_obj(crtc, plane_configs); - if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config)) + if (display->funcs.display->fixup_initial_plane_config(crtc, plane_config)) intel_crtc_wait_for_next_vblank(crtc); plane_config_fini(plane_config); diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7c78496e6213..d86219dedde2 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -83,6 +83,8 @@ #define RING_IMR(base) XE_REG((base) + 0xa8) #define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) +#define CS_INT_VEC(base) XE_REG((base) + 0x1b8) + #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) #define RING_ESR(base) XE_REG((base) + 0xb8) @@ -138,6 +140,7 @@ #define RING_MODE(base) XE_REG((base) + 0x29c) #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) +#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0c9e4b2fafab..162f18e975da 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -445,6 +445,8 @@ #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) +#define SMP_WAIT_FETCH_MERGING_COUNTER REG_GENMASK(11, 10) +#define SMP_FORCE_128B_OVERFETCH REG_FIELD_PREP(SMP_WAIT_FETCH_MERGING_COUNTER, 1) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 045dfd09db99..57944f90bbf6 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -25,6 +25,9 @@ #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) +#define CTX_CS_INT_VEC_REG 0x5a +#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) + #define INDIRECT_CTX_RING_HEAD (0x02 + 1) #define INDIRECT_CTX_RING_TAIL (0x04 + 1) #define INDIRECT_CTX_RING_START (0x06 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a9b0091cb7ee..a49561e9f3c3 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -41,14 +41,6 @@ #define OAG_OABUFFER XE_REG(0xdb08) #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) -#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) -#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) -#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) -#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) -#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) -#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) -#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) -#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) @@ -63,6 +55,7 @@ #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) +#define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12) #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h new file mode 100644 index 000000000000..f45abcd96ba8 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_PMT_H_ +#define _XE_PMT_H_ + +#define SOC_BASE 0x280000 + +#define BMG_PMT_BASE_OFFSET 0xDB000 +#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) + +#define BMG_TELEMETRY_BASE_OFFSET 0xE0000 +#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) + +#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) +#define SG_REMAP_BITS REG_GENMASK(31, 24) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 51fd40ffafcb..0eedd6c26b1b 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -13,7 +13,7 @@ /** * struct xe_reg - Register definition * - * Register defintion to be used by the individual register. Although the same + * Register definition to be used by the individual register. Although the same * definition is used for xe_reg and xe_reg_mcr, they use different internal * APIs for accesses. */ diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3293172b0128..6cf282618836 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -44,12 +44,16 @@ #define MTL_RP_STATE_CAP XE_REG(0x138000) +#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008) #define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) #define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) #define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) #define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) +#define MTL_MPA_FREQUENCY XE_REG(0x138028) +#define MTL_RPA_MASK REG_GENMASK(8, 0) + #define MTL_MPE_FREQUENCY XE_REG(0x13802c) #define MTL_RPE_MASK REG_GENMASK(8, 0) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3e0ae40ebbd2..6795d1d916e4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -49,6 +49,13 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); } + + if (dma_fence_wait_timeout(fence, false, 5 * HZ) <= 0) { + dma_fence_put(fence); + KUNIT_FAIL(test, "Timeout while clearing bo.\n"); + return -ETIME; + } + dma_fence_put(fence); } @@ -257,10 +264,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc * however seems quite fragile not to also restart the GT. Try * to do that here by triggering a GT reset. */ - for_each_gt(__gt, xe, id) { - xe_gt_reset_async(__gt); - flush_work(&__gt->reset.worker); - } + for_each_gt(__gt, xe, id) + xe_gt_reset(__gt); + if (err) { KUNIT_FAIL(test, "restore kernel err=%pe\n", ERR_PTR(err)); @@ -599,8 +605,6 @@ static void xe_bo_shrink_kunit(struct kunit *test) static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), - KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, - {.speed = KUNIT_SPEED_SLOW}), {} }; @@ -611,3 +615,17 @@ struct kunit_suite xe_bo_test_suite = { .init = xe_kunit_helper_xe_device_live_test_init, }; EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); + +static struct kunit_case xe_bo_shrink_test[] = { + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, + {.speed = KUNIT_SPEED_SLOW}), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_bo_shrink_test_suite = { + .name = "xe_bo_shrink", + .test_cases = xe_bo_shrink_test, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 0d36ab864ec0..81277c77016d 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -6,11 +6,13 @@ #include <kunit/test.h> extern struct kunit_suite xe_bo_test_suite; +extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; kunit_test_suite(xe_bo_test_suite); +kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 1a192a2a941b..d5fe0ea889ad 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -83,7 +83,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, bo->size, ttm_bo_type_kernel, region | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); @@ -224,8 +225,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_PINNED); if (IS_ERR(tiny)) { - KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", - PTR_ERR(pt)); + KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", + PTR_ERR(tiny)); goto free_pt; } @@ -642,7 +643,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(sys_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -666,7 +669,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -690,7 +694,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 6f9b7a266b41..ef1e5256c56a 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt, mocs_dbg(gt, "reg_val=0x%x\n", reg_val); } else { - /* Just re-use value read on previous iteration */ + /* Just reuse value read on previous iteration */ reg_val >>= 16; } @@ -162,8 +162,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); - xe_gt_reset_async(gt); - flush_work(>->reset.worker); + xe_gt_reset(gt); kunit_info(test, "mocs_reset_test after reset\n"); if (flags & HAS_GLOBAL_MOCS) diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 04d6b95c6d87..68fe70ce2be3 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -14,7 +14,7 @@ #include "xe_step.h" /** - * DOC: Xe ASSERTs + * DOC: Xe Asserts * * While Xe driver aims to be simpler than legacy i915 driver it is still * complex enough that some changes introduced while adding new functionality @@ -103,7 +103,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") #define xe_assert_msg(xe, condition, msg, arg...) ({ \ @@ -138,7 +138,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") #define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ @@ -162,7 +162,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") #define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index ef777dbdf4ec..9570672fce33 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) /* * We need to allocate space for the requested number of dwords, * one additional MI_BATCH_BUFFER_END dword, and additional buffer - * space to accomodate the platform-specific hardware prefetch + * space to accommodate the platform-specific hardware prefetch * requirements. */ bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ae6b337cdc54..3f5391d416d4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -162,6 +162,15 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, } } +static bool force_contiguous(u32 bo_flags) +{ + /* + * For eviction / restore on suspend / resume objects pinned in VRAM + * must be contiguous, also only contiguous BOs support xe_bo_vmap. + */ + return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); +} + static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { @@ -175,12 +184,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, xe_assert(xe, vram && vram->usable_size); io_size = vram->io_size; - /* - * For eviction / restore on suspend / resume objects - * pinned in VRAM must be contiguous - */ - if (bo_flags & (XE_BO_FLAG_PINNED | - XE_BO_FLAG_GGTT)) + if (force_contiguous(bo_flags)) place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (io_size < vram->usable_size) { @@ -212,8 +216,7 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, - .flags = bo_flags & (XE_BO_FLAG_PINNED | - XE_BO_FLAG_GGTT) ? + .flags = force_contiguous(bo_flags) ? TTM_PL_FLAG_CONTIGUOUS : 0, }; *c += 1; @@ -442,6 +445,14 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) kfree(tt); } +static bool xe_ttm_resource_visible(struct ttm_resource *mem) +{ + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(mem); + + return vres->used_visible_size == mem->size; +} + static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { @@ -453,11 +464,9 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, return 0; case XE_PL_VRAM0: case XE_PL_VRAM1: { - struct xe_ttm_vram_mgr_resource *vres = - to_xe_ttm_vram_mgr_resource(mem); struct xe_mem_region *vram = res_to_mem_region(mem); - if (vres->used_visible_size < mem->size) + if (!xe_ttm_resource_visible(mem)) return -EINVAL; mem->bus.offset = mem->start << PAGE_SHIFT; @@ -724,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem->mem_type == XE_PL_SYSTEM) { long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, - true, + false, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { ret = timeout; @@ -777,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * / resume, some of the pinned memory is required for the * device to resume / use the GPU to move other evicted memory * (user memory) around. This likely could be optimized a bit - * futher where we find the minimum set of pinned memory + * further where we find the minimum set of pinned memory * required for resume but for simplity doing a memcpy for all * pinned memory. */ @@ -848,8 +857,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && - ttm_bo->ttm) + ttm_bo->ttm) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_KERNEL, + false, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + ret = timeout; + xe_tt_unmap_sg(ttm_bo->ttm); + } return ret; } @@ -858,7 +875,7 @@ out: * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * - * On successful completion, the object memory will be moved to sytem memory. + * On successful completion, the object memory will be moved to system memory. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -876,6 +893,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) }; struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = true, }; struct ttm_resource *new_mem; int ret; @@ -937,6 +955,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) { struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = false, }; struct ttm_resource *new_mem; struct ttm_place *place = &bo->placements[0]; @@ -1106,7 +1125,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) { struct ttm_operation_ctx ctx = { - .interruptible = false + .interruptible = false, + .gfp_retry_mayfail = false, }; if (ttm_bo->ttm) { @@ -1118,6 +1138,52 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) } } +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, + unsigned long offset, void *buf, int len, + int write) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct iosys_map vmap; + struct xe_res_cursor cursor; + struct xe_mem_region *vram; + int bytes_left = len; + + xe_bo_assert_held(bo); + xe_device_assert_mem_access(xe); + + if (!mem_type_is_vram(ttm_bo->resource->mem_type)) + return -EIO; + + /* FIXME: Use GPU for non-visible VRAM */ + if (!xe_ttm_resource_visible(ttm_bo->resource)) + return -EIO; + + vram = res_to_mem_region(ttm_bo->resource); + xe_res_first(ttm_bo->resource, offset & PAGE_MASK, + bo->size - (offset & PAGE_MASK), &cursor); + + do { + unsigned long page_offset = (offset & ~PAGE_MASK); + int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left); + + iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping + + cursor.start); + if (write) + xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count); + else + xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count); + + buf += byte_count; + offset += byte_count; + bytes_left -= byte_count; + if (bytes_left) + xe_res_next(&cursor, PAGE_SIZE); + } while (bytes_left); + + return len; +} + const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, @@ -1127,6 +1193,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .move = xe_bo_move, .io_mem_reserve = xe_ttm_io_mem_reserve, .io_mem_pfn = xe_ttm_io_mem_pfn, + .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, .eviction_valuable = ttm_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, @@ -1137,6 +1204,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) { struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_tile *tile; + u8 id; if (bo->ttm.base.import_attach) drm_prime_gem_destroy(&bo->ttm.base, NULL); @@ -1144,8 +1213,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node && bo->ggtt_node->base.size) - xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); + for_each_tile(tile, xe, id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size) + xe_ggtt_remove_bo(tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS if (bo->client) @@ -1243,11 +1313,50 @@ out: return ret; } +static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct ttm_buffer_object *ttm_bo = vma->vm_private_data; + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = xe_bo_device(bo); + int ret; + + xe_pm_runtime_get(xe); + ret = ttm_bo_vm_access(vma, addr, buf, len, write); + xe_pm_runtime_put(xe); + + return ret; +} + +/** + * xe_bo_read() - Read from an xe_bo + * @bo: The buffer object to read from. + * @offset: The byte offset to start reading from. + * @dst: Location to store the read. + * @size: Size in bytes for the read. + * + * Read @size bytes from the @bo, starting from @offset, storing into @dst. + * + * Return: Zero on success, or negative error. + */ +int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) +{ + int ret; + + ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0); + if (ret >= 0 && ret != size) + ret = -EIO; + else if (ret == size) + ret = 0; + + return ret; +} + static const struct vm_operations_struct xe_gem_vm_ops = { .fault = xe_gem_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = xe_bo_vm_access, }; static const struct drm_gem_object_funcs xe_gem_object_funcs = { @@ -1261,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = { /** * xe_bo_alloc - Allocate storage for a struct xe_bo * - * This funcition is intended to allocate storage to be used for input + * This function is intended to allocate storage to be used for input * to __xe_bo_create_locked(), in the case a pointer to the bo to be * created is needed before the call to __xe_bo_create_locked(). * If __xe_bo_create_locked ends up never to be called, then the @@ -1301,6 +1410,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement *placement; uint32_t alignment; @@ -1315,6 +1425,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } + /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + return ERR_PTR(-EINVAL); + if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || @@ -1505,19 +1619,29 @@ __xe_bo_create_locked(struct xe_device *xe, bo->vm = vm; if (bo->flags & XE_BO_FLAG_GGTT) { - if (!tile && flags & XE_BO_FLAG_STOLEN) - tile = xe_device_get_root_tile(xe); + struct xe_tile *t; + u8 id; - xe_assert(xe, tile); + if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) { + if (!tile && flags & XE_BO_FLAG_STOLEN) + tile = xe_device_get_root_tile(xe); - if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { - err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, - start + bo->size, U64_MAX); - } else { - err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); + xe_assert(xe, tile); + } + + for_each_tile(t, xe, id) { + if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t))) + continue; + + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { + err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, + start + bo->size, U64_MAX); + } else { + err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + } + if (err) + goto err_unlock_put_bo; } - if (err) - goto err_unlock_put_bo; } return bo; @@ -1900,6 +2024,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; if (vm) { @@ -1910,6 +2035,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + trace_xe_bo_validate(bo); return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); } @@ -1961,13 +2087,15 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) int xe_bo_vmap(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); void *virtual; bool is_iomem; int ret; xe_bo_assert_held(bo); - if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) || + !force_contiguous(bo->flags))) return -EINVAL; if (!iosys_map_is_null(&bo->vmap)) @@ -2243,6 +2371,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; struct ttm_place requested; @@ -2283,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) * @force_alloc: Set force_alloc in ttm_operation_ctx * * On successful completion, the object memory will be moved to evict - * placement. Ths function blocks until the object has been fully moved. + * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ @@ -2293,6 +2422,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc) .interruptible = false, .no_wait_gpu = false, .force_alloc = force_alloc, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; int ret; @@ -2372,14 +2502,18 @@ void xe_bo_put_commit(struct llist_head *deferred) void xe_bo_put(struct xe_bo *bo) { + struct xe_tile *tile; + u8 id; + might_sleep(); if (bo) { #ifdef CONFIG_PROC_FS if (bo->client) might_lock(&bo->client->bos_lock); #endif - if (bo->ggtt_node && bo->ggtt_node->ggtt) - might_lock(&bo->ggtt_node->ggtt->lock); + for_each_tile(tile, xe_bo_device(bo), id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) + might_lock(&bo->ggtt_node[id]->ggtt->lock); drm_gem_object_put(&bo->ttm.base); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 7fa44a0138b0..d9386ab03140 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,10 +39,22 @@ #define XE_BO_FLAG_NEEDS_64K BIT(15) #define XE_BO_FLAG_NEEDS_2M BIT(16) #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) +#define XE_BO_FLAG_GGTT0 BIT(18) +#define XE_BO_FLAG_GGTT1 BIT(19) +#define XE_BO_FLAG_GGTT2 BIT(20) +#define XE_BO_FLAG_GGTT3 BIT(21) +#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ + XE_BO_FLAG_GGTT1 | \ + XE_BO_FLAG_GGTT2 | \ + XE_BO_FLAG_GGTT3) + /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_GGTTx(tile) \ + (XE_BO_FLAG_GGTT0 << (tile)->id) + #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) #define XE_PTE_MASK (XE_PAGE_SIZE - 1) @@ -194,18 +206,29 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) } static inline u32 -xe_bo_ggtt_addr(struct xe_bo *bo) +__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { - if (XE_WARN_ON(!bo->ggtt_node)) + struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id]; + + if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(bo->ggtt_node->base.size > bo->size); - XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); - return bo->ggtt_node->base.start; + XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); + return ggtt_node->base.start; +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + xe_assert(xe_bo_device(bo), bo->tile); + + return __xe_bo_ggtt_addr(bo, bo->tile->id); } int xe_bo_vmap(struct xe_bo *bo); void xe_bo_vunmap(struct xe_bo *bo); +int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h index f57d440cc95a..25a884c64bf1 100644 --- a/drivers/gpu/drm/xe/xe_bo_doc.h +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -41,7 +41,7 @@ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All * user BOs are evictable and user BOs are never pinned by XE. The allocation of - * the backing store can be defered from creation time until first use which is + * the backing store can be deferred from creation time until first use which is * either mmap, bind, or pagefault. * * Private BOs diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 8fb2be061003..6a40eedd9db1 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe) } if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile = bo->tile; + struct xe_tile *tile; + u8 id; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); + for_each_tile(tile, xe, id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } } /* diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 99196228dcc8..46dc9e4e3e46 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -12,6 +12,7 @@ #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> +#include "xe_device_types.h" #include "xe_ggtt_types.h" struct xe_device; @@ -38,8 +39,8 @@ struct xe_bo { struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; /** @placement: current placement for this BO */ struct ttm_placement placement; - /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct xe_ggtt_node *ggtt_node; + /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */ + struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index f8947e7e917e..81dc7795c065 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -30,30 +30,39 @@ /** * DOC: Xe device coredump * - * Devices overview: * Xe uses dev_coredump infrastructure for exposing the crash errors in a - * standardized way. - * devcoredump exposes a temporary device under /sys/class/devcoredump/ - * which is linked with our card device directly. - * The core dump can be accessed either from - * /sys/class/drm/card<n>/device/devcoredump/ or from - * /sys/class/devcoredump/devcd<m> where - * /sys/class/devcoredump/devcd<m>/failing_device is a link to - * /sys/class/drm/card<n>/device/. + * standardized way. Once a crash occurs, devcoredump exposes a temporary + * node under ``/sys/class/devcoredump/devcd<m>/``. The same node is also + * accessible in ``/sys/class/drm/card<n>/device/devcoredump/``. The + * ``failing_device`` symlink points to the device that crashed and created the + * coredump. * - * Snapshot at hang: - * The 'data' file is printed with a drm_printer pointer at devcoredump read - * time. For this reason, we need to take snapshots from when the hang has - * happened, and not only when the user is reading the file. Otherwise the - * information is outdated since the resets might have happened in between. + * The following characteristics are observed by xe when creating a device + * coredump: * - * 'First' failure snapshot: - * In general, the first hang is the most critical one since the following hangs - * can be a consequence of the initial hang. For this reason we only take the - * snapshot of the 'first' failure and ignore subsequent calls of this function, - * at least while the coredump device is alive. Dev_coredump has a delayed work - * queue that will eventually delete the device and free all the dump - * information. + * **Snapshot at hang**: + * The 'data' file contains a snapshot of the HW and driver states at the time + * the hang happened. Due to the driver recovering from resets/crashes, it may + * not correspond to the state of the system when the file is read by + * userspace. + * + * **Coredump release**: + * After a coredump is generated, it stays in kernel memory until released by + * userspace by writing anything to it, or after an internal timer expires. The + * exact timeout may vary and should not be relied upon. Example to release + * a coredump: + * + * .. code-block:: shell + * + * $ > /sys/class/drm/card0/device/devcoredump/data + * + * **First failure only**: + * In general, the first hang is the most critical one since the following + * hangs can be a consequence of the initial hang. For this reason a snapshot + * is taken only for the first failure. Until the devcoredump is released by + * userspace or kernel, all subsequent hangs do not override the snapshot nor + * create new ones. Devcoredump has a delayed work queue that will eventually + * delete the file node and free all the dump information. */ #ifdef CONFIG_DEV_COREDUMP @@ -91,6 +100,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, p = drm_coredump_printer(&iter); drm_puts(&p, "**** Xe Device Coredump ****\n"); + drm_printf(&p, "Reason: %s\n", ss->reason); drm_puts(&p, "kernel: " UTS_RELEASE "\n"); drm_puts(&p, "module: " KBUILD_MODNAME "\n"); @@ -98,7 +108,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); ts = ktime_to_timespec64(ss->boot_time); drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); - drm_printf(&p, "Process: %s\n", ss->process_name); + drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid); xe_device_snapshot_print(xe, &p); drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id); @@ -109,7 +119,11 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - drm_puts(&p, "\n**** Contexts ****\n"); + /* + * Don't add a new section header here because the mesa debug decoder + * tool expects the context information to be in the 'GuC CT' section. + */ + /* drm_puts(&p, "\n**** Contexts ****\n"); */ xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -130,6 +144,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) { int i; + kfree(ss->reason); + ss->reason = NULL; + xe_guc_log_snapshot_free(ss->guc.log); ss->guc.log = NULL; @@ -170,16 +187,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (!ss->read.buffer) + mutex_lock(&coredump->lock); + + if (!ss->read.buffer) { + mutex_unlock(&coredump->lock); return -ENODEV; + } - if (offset >= ss->read.size) + if (offset >= ss->read.size) { + mutex_unlock(&coredump->lock); return 0; + } byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; memcpy(buffer, ss->read.buffer + offset, byte_copied); + mutex_unlock(&coredump->lock); + return byte_copied; } @@ -193,15 +218,18 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); + mutex_lock(&coredump->lock); + xe_devcoredump_snapshot_free(&coredump->snapshot); kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); coredump->captured = false; - coredump->job = NULL; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); + + mutex_unlock(&coredump->lock); } static void xe_devcoredump_deferred_snap_work(struct work_struct *work) @@ -244,10 +272,10 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) } static void devcoredump_snapshot(struct xe_devcoredump *coredump, + struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; - struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); u32 adj_logical_mask = q->logical_mask; u32 width_mask = (0x1 << q->width) - 1; @@ -260,12 +288,14 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); - if (q->vm && q->vm->xef) + if (q->vm && q->vm->xef) { process_name = q->vm->xef->process_name; + ss->pid = q->vm->xef->pid; + } + strscpy(ss->process_name, process_name); ss->gt = q->gt; - coredump->job = job; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); @@ -284,10 +314,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); ss->ge = xe_guc_exec_queue_snapshot_capture(q); - ss->job = xe_sched_job_snapshot_capture(job); + if (job) + ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); queue_work(system_unbound_wq, &ss->work); @@ -297,28 +328,42 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, /** * xe_devcoredump - Take the required snapshots and initialize coredump device. + * @q: The faulty xe_exec_queue, where the issue was detected. * @job: The faulty xe_sched_job, where the issue was detected. + * @fmt: Printf format + args to describe the reason for the core dump * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_sched_job *job) +__printf(3, 4) +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...) { - struct xe_device *xe = gt_to_xe(job->q->gt); + struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; + va_list varg; + + mutex_lock(&coredump->lock); if (coredump->captured) { drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n"); + mutex_unlock(&coredump->lock); return; } coredump->captured = true; - devcoredump_snapshot(coredump, job); + + va_start(varg, fmt); + coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt, varg); + va_end(varg); + + devcoredump_snapshot(coredump, q, job); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); + + mutex_unlock(&coredump->lock); } static void xe_driver_devcoredump_fini(void *arg) @@ -330,6 +375,18 @@ static void xe_driver_devcoredump_fini(void *arg) int xe_devcoredump_init(struct xe_device *xe) { + int err; + + err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock); + if (err) + return err; + + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&xe->devcoredump.lock); + fs_reclaim_release(GFP_KERNEL); + } + return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } @@ -363,6 +420,15 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; + /* + * Splitting blobs across multiple lines is not compatible with the mesa + * debug decoder tool. Note that even dropping the explicit '\n' below + * doesn't help because the GuC log is so big some underlying implementation + * still splits the lines at 512K characters. So just bail completely for + * the moment. + */ + return; + #define DMESG_MAX_LINE_LEN 800 #define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index a4eebc285fc8..6a17e6d60102 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -10,13 +10,16 @@ struct drm_printer; struct xe_device; +struct xe_exec_queue; struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_sched_job *job); +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...); int xe_devcoredump_init(struct xe_device *xe); #else -static inline void xe_devcoredump(struct xe_sched_job *job) +static inline void xe_devcoredump(struct xe_exec_queue *q, + struct xe_sched_job *job, + const char *fmt, ...) { } diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 3703ddea1252..1a1d16a96b2d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -28,6 +28,10 @@ struct xe_devcoredump_snapshot { ktime_t boot_time; /** @process_name: Name of process that triggered this gpu hang */ char process_name[TASK_COMM_LEN]; + /** @pid: Process id of process that triggered this gpu hang */ + pid_t pid; + /** @reason: The reason the coredump was triggered */ + char *reason; /** @gt: Affected GT, used by forcewake for delayed capture */ struct xe_gt *gt; @@ -76,12 +80,12 @@ struct xe_devcoredump_snapshot { * for reading the information. */ struct xe_devcoredump { - /** @captured: The snapshot of the first hang has already been taken. */ + /** @lock: protects access to entire structure */ + struct mutex lock; + /** @captured: The snapshot of the first hang has already been taken */ bool captured; /** @snapshot: Snapshot is captured at time of the first crash */ struct xe_devcoredump_snapshot snapshot; - /** @job: Point to the faulting job */ - struct xe_sched_job *job; }; #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d8b6bc8fc8cf..4e1839b483a0 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" #include "xe_pcode.h" @@ -55,6 +56,7 @@ #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_vram.h" +#include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" @@ -323,7 +325,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; - spin_lock_init(&xe->irq.lock); + err = xe_irq_init(xe); + if (err) + goto err; init_waitqueue_head(&xe->ufence_wq); @@ -365,6 +369,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, goto err; } + err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); + if (err) + goto err; + err = xe_display_create(xe); if (WARN_ON(err)) goto err; @@ -513,7 +521,7 @@ static int wait_for_lmem_ready(struct xe_device *xe) drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); start = jiffies; - timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ + timeout = start + secs_to_jiffies(60); /* 60 sec! */ do { if (signal_pending(current)) @@ -598,7 +606,7 @@ static int probe_has_flat_ccs(struct xe_device *xe) u32 reg; /* Always enabled/disabled, no runtime check to do */ - if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) + if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe)) return 0; gt = xe_root_mmio_gt(xe); @@ -759,6 +767,8 @@ int xe_device_probe(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); + xe_vsec_init(xe); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: @@ -989,7 +999,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a mudule + * This is a final state that can only be cleared with a module * re-probe (unbind + bind). * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f1fbfe916867..fc3c2af3fb7f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe) static inline bool xe_device_has_msix(struct xe_device *xe) { - /* TODO: change this when MSI-X support is fully integrated */ - return false; + return xe->irq.msix.nvec > 0; } static inline bool xe_device_has_memirq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b9ea455d6f59..8a7b15972413 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -16,7 +16,7 @@ #include "xe_heci_gsc.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" -#include "xe_oa.h" +#include "xe_oa_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -42,8 +42,6 @@ struct xe_pat_ops; #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) #define IS_DGFX(xe) ((xe)->info.is_dgfx) -#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) -#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi) #define XE_VRAM_FLAGS_NEED64K BIT(0) @@ -296,14 +294,24 @@ struct xe_device { /** @info.va_bits: Maximum bits of a virtual address */ u8 va_bits; - /** @info.is_dgfx: is discrete device */ - u8 is_dgfx:1; - /** @info.has_asid: Has address space ID */ - u8 has_asid:1; + /* + * Keep all flags below alphabetically sorted + */ + /** @info.force_execlist: Forced execlist submission */ u8 force_execlist:1; + /** @info.has_asid: Has address space ID */ + u8 has_asid:1; + /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ + u8 has_atomic_enable_pte_bit:1; + /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ + u8 has_device_atomics_on_smem:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_heci_cscfi: device has heci cscfi */ + u8 has_heci_cscfi:1; + /** @info.has_heci_gscfi: device has heci gscfi */ + u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; /** @info.has_mmio_ext: Device has extra MMIO address range */ @@ -314,6 +322,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.is_dgfx: is discrete device */ + u8 is_dgfx:1; /** * @info.probe_display: Probe display hardware. If set to * false, the driver will behave as if there is no display @@ -323,20 +333,12 @@ struct xe_device { * state the firmware or bootloader left it in. */ u8 probe_display:1; + /** @info.skip_guc_pc: Skip GuC based PM feature init */ + u8 skip_guc_pc:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; - /** @info.has_heci_gscfi: device has heci gscfi */ - u8 has_heci_gscfi:1; - /** @info.has_heci_cscfi: device has heci cscfi */ - u8 has_heci_cscfi:1; - /** @info.skip_guc_pc: Skip GuC based PM feature init */ - u8 skip_guc_pc:1; - /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ - u8 has_atomic_enable_pte_bit:1; - /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ - u8 has_device_atomics_on_smem:1; } info; /** @irq: device interrupt state */ @@ -345,7 +347,15 @@ struct xe_device { spinlock_t lock; /** @irq.enabled: interrupts enabled on this device */ - bool enabled; + atomic_t enabled; + + /** @irq.msix: irq info for platforms that support MSI-X */ + struct { + /** @irq.msix.nvec: number of MSI-X interrupts */ + u16 nvec; + /** @irq.msix.indexes: used to allocate MSI-X indexes */ + struct xarray indexes; + } msix; } irq; /** @ttm: ttm device */ @@ -374,6 +384,8 @@ struct xe_device { /** @sriov.pf: PF specific data */ struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -481,6 +493,12 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pmt: Support the PMT driver callback interface */ + struct { + /** @pmt.lock: protect access for telemetry data */ + struct mutex lock; + } pmt; + /** * @pm_callback_task: Track the active task that is running in either * the runtime_suspend or runtime_resume callbacks. @@ -588,7 +606,7 @@ struct xe_file { /** @vm.xe: xarray to store VMs */ struct xarray xa; /** - * @vm.lock: Protects VM lookup + reference and removal a from + * @vm.lock: Protects VM lookup + reference and removal from * file xarray. Not an intended to be an outer lock which does * thing while being held. */ @@ -601,10 +619,15 @@ struct xe_file { struct xarray xa; /** * @exec_queue.lock: Protects exec queue lookup + reference and - * removal a frommfile xarray. Not an intended to be an outer - * lock which does thing while being held. + * removal from file xarray. Not intended to be an outer lock + * which does things while being held. */ struct mutex lock; + /** + * @exec_queue.pending_removal: items pending to be removed to + * synchronize GPU state update with ongoing query. + */ + atomic_t pending_removal; } exec_queue; /** @run_ticks: hw engine class run time in ticks for this drm client */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 1436d8f79e70..63f30b6df70b 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -270,6 +270,49 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) } } +static struct xe_hw_engine *any_engine(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned long gt_id; + + for_each_gt(gt, xe, gt_id) { + struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt); + + if (hwe) + return hwe; + } + + return NULL; +} + +static bool force_wake_get_any_engine(struct xe_device *xe, + struct xe_hw_engine **phwe, + unsigned int *pfw_ref) +{ + enum xe_force_wake_domains domain; + unsigned int fw_ref; + struct xe_hw_engine *hwe; + struct xe_force_wake *fw; + + hwe = any_engine(xe); + if (!hwe) + return false; + + domain = xe_hw_engine_to_fw_domain(hwe); + fw = gt_to_fw(hwe->gt); + + fw_ref = xe_force_wake_get(fw, domain); + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { + xe_force_wake_put(fw, fw_ref); + return false; + } + + *phwe = hwe; + *pfw_ref = fw_ref; + + return true; +} + static void show_run_ticks(struct drm_printer *p, struct drm_file *file) { unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; @@ -281,7 +324,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) u64 gpu_timestamp; unsigned int fw_ref; + /* + * Wait for any exec queue going away: their cycles will get updated on + * context switch out, so wait for that to happen + */ + wait_var_event(&xef->exec_queue.pending_removal, + !atomic_read(&xef->exec_queue.pending_removal)); + xe_pm_runtime_get(xe); + if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) { + xe_pm_runtime_put(xe); + return; + } /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); @@ -296,33 +350,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) } mutex_unlock(&xef->exec_queue.lock); - /* Get the total GPU cycles */ - for_each_gt(gt, xe, gt_id) { - enum xe_force_wake_domains fw; - - hwe = xe_gt_any_hw_engine(gt); - if (!hwe) - continue; - - fw = xe_hw_engine_to_fw_domain(hwe); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); - if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { - hwe = NULL; - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); + xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref); xe_pm_runtime_put(xe); - if (unlikely(!hwe)) - return; - for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { const char *class_name; @@ -353,7 +385,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) * @p: The drm_printer ptr * @file: The drm_file ptr * - * This is callabck for drm fdinfo interface. Register this callback + * This is callback for drm fdinfo interface. Register this callback * in drm driver ops for show_fdinfo. * * Return: void diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 31cca938956f..df8ce550deb4 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -33,7 +33,7 @@ * * In XE we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as - * seperate operations, and using the DRM scheduler to flow control the ring. + * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. * * We can get away from a BO list by forcing the user to use in / out fences on diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fd0f3b3c9101..7e1abbbfba12 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -8,6 +8,7 @@ #include <linux/nospec.h> #include <drm/drm_device.h> +#include <drm/drm_drv.h> #include <drm/drm_file.h> #include <uapi/drm/xe_drm.h> @@ -16,6 +17,7 @@ #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" @@ -68,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->gt = gt; q->class = hwe->class; q->width = width; + q->msix_vec = XE_IRQ_DEFAULT_MSIX; q->logical_mask = logical_mask; q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; @@ -117,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) } for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); goto err_unlock; @@ -240,6 +243,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, return q; } +ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); void xe_exec_queue_destroy(struct kref *ref) { @@ -262,8 +266,11 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) /* * Before releasing our ref to lrc and xef, accumulate our run ticks + * and wakeup any waiters. */ xe_exec_queue_update_run_ticks(q); + if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) + wake_up_var(&q->xef->exec_queue.pending_removal); for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); @@ -762,20 +769,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { - struct xe_file *xef; + struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; u32 old_ts, new_ts; + int idx; /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. + * Jobs that are executed by kernel doesn't have a corresponding xe_file + * and thus are not accounted. */ - if (!q->vm || !q->vm->xef) + if (!q->xef) return; - xef = q->vm->xef; - + /* Synchronize with unbind while holding the xe file open */ + if (!drm_dev_enter(&xe->drm, &idx)) + return; /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -786,7 +794,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + + drm_dev_exit(idx); } /** @@ -826,7 +836,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->exec_queue.lock); q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + if (q) + atomic_inc(&xef->exec_queue.pending_removal); mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1158b6062a6c..5af5419cec7a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -41,7 +41,7 @@ struct xe_exec_queue { /** @xef: Back pointer to xe file if this is user created exec queue */ struct xe_file *xef; - /** @gt: graphics tile this exec queue can submit to */ + /** @gt: GT structure this exec queue can submit to */ struct xe_gt *gt; /** * @hwe: A hardware of the same class. May (physical engine) or may not @@ -63,6 +63,8 @@ struct xe_exec_queue { char name[MAX_FENCE_NAME_LEN]; /** @width: width (number BB submitted per exec) of this exec queue */ u16 width; + /** @msix_vec: MSI-X vector (for platforms that support it) */ + u16 msix_vec; /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index a8c416a48812..5ef96deaa881 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -17,6 +17,7 @@ #include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, struct xe_mmio *mmio = >->mmio; struct xe_device *xe = gt_to_xe(gt); u64 lrc_desc; + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); lrc_desc = xe_lrc_descriptor(lrc); @@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), xe_bo_ggtt_addr(hwe->hwsp)); xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); - xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), lower_32_bits(lrc_desc)); @@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 558fac8bb6fb..5fcb2b4c2c13 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) /* * So we don't need to worry about 64K GGTT layout when dealing with - * scratch entires, rather keep the scratch page in system memory on + * scratch entries, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ flags = XE_BO_FLAG_PINNED; @@ -598,10 +598,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) u64 start; u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node)) + if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) return; - start = bo->ggtt_node->base.start; + start = bo->ggtt_node[ggtt->tile->id]->base.start; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); @@ -612,15 +612,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { - int err; u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; + u8 tile_id = ggtt->tile->id; + int err; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node)) { + if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); return 0; } @@ -630,19 +631,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); - bo->ggtt_node = xe_ggtt_node_init(ggtt); - if (IS_ERR(bo->ggtt_node)) { - err = PTR_ERR(bo->ggtt_node); - bo->ggtt_node = NULL; + bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node[tile_id])) { + err = PTR_ERR(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; goto out; } mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, - alignment, 0, start, end, 0); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, + bo->size, alignment, 0, start, end, 0); if (err) { - xe_ggtt_node_fini(bo->ggtt_node); - bo->ggtt_node = NULL; + xe_ggtt_node_fini(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; } else { xe_ggtt_map_bo(ggtt, bo); } @@ -691,13 +692,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node)) + u8 tile_id = ggtt->tile->id; + + if (XE_WARN_ON(!bo->ggtt_node[tile_id])) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); - xe_ggtt_node_remove(bo->ggtt_node, + xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 64b2ae6839db..c250ea773491 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -71,8 +71,14 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, static inline struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) { - return list_first_entry_or_null(&sched->base.pending_list, - struct xe_sched_job, drm.list); + struct xe_sched_job *job; + + spin_lock(&sched->base.job_list_lock); + job = list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); + spin_unlock(&sched->base.job_list_lock); + + return job; } static inline int diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index fc64b45d324b..24cc6a4f9a96 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -139,17 +139,29 @@ static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size) return 0; } -static int validate_proxy_header(struct xe_gsc_proxy_header *header, +static int validate_proxy_header(struct xe_gt *gt, + struct xe_gsc_proxy_header *header, u32 source, u32 dest, u32 max_size) { u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); + int ret = 0; - if (header->destination != dest || header->source != source) - return -ENOEXEC; + if (header->destination != dest || header->source != source) { + ret = -ENOEXEC; + goto out; + } - if (length + PROXY_HDR_SIZE > max_size) - return -E2BIG; + if (length + PROXY_HDR_SIZE > max_size) { + ret = -E2BIG; + goto out; + } + + /* We only care about the status if this is a message for the driver */ + if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) { + ret = -EIO; + goto out; + } switch (type) { case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: @@ -157,12 +169,20 @@ static int validate_proxy_header(struct xe_gsc_proxy_header *header, break; fallthrough; case GSC_PROXY_MSG_TYPE_PROXY_INVALID: - return -EIO; + ret = -EIO; + break; default: break; } - return 0; +out: + if (ret) + xe_gt_err(gt, + "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n", + header->source, source, header->destination, dest, + type, length, header->status); + + return ret; } #define proxy_header_wr(xe_, map_, offset_, field_, val_) \ @@ -228,12 +248,17 @@ static int proxy_query(struct xe_gsc *gsc) xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc, reply_offset, PROXY_HDR_SIZE); - /* stop if this was the last message */ - if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) + /* Check the status and stop if this was the last message */ + if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) { + ret = validate_proxy_header(gt, to_csme_hdr, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_ADDRESSING_KMD, + GSC_PROXY_BUFFER_SIZE - reply_offset); break; + } /* make sure the GSC-to-CSME proxy header is sane */ - ret = validate_proxy_header(to_csme_hdr, + ret = validate_proxy_header(gt, to_csme_hdr, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_BUFFER_SIZE - reply_offset); @@ -262,7 +287,7 @@ static int proxy_query(struct xe_gsc *gsc) } /* make sure the CSME-to-GSC proxy header is sane */ - ret = validate_proxy_header(gsc->proxy.from_csme, + ret = validate_proxy_header(gt, gsc->proxy.from_csme, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_BUFFER_SIZE - reply_offset); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d6744be01a68..26e64530ada2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); + err = xe_gt_tlb_invalidation_init_early(gt); + if (err) + return err; + return 0; } @@ -588,10 +592,6 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_tlb_invalidation_init(gt); - if (err) - return err; - err = xe_gt_pagefault_init(gt); if (err) return err; @@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - for_each_hw_engine(hwe, gt, id) { + for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 82b9b7f82fca..e504cc33ade4 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt); /** * xe_gt_record_user_engines - save data related to engines available to - * usersapce + * userspace * @gt: GT structure * * Walk the available HW engines from gt->info.engine_mask and calculate data @@ -57,6 +57,31 @@ int xe_gt_sanitize_freq(struct xe_gt *gt); void xe_gt_remove(struct xe_gt *gt); /** + * xe_gt_wait_for_reset - wait for gt's async reset to finalize. + * @gt: GT structure + * Return: + * %true if it waited for the work to finish execution, + * %false if there was no scheduled reset or it was done. + */ +static inline bool xe_gt_wait_for_reset(struct xe_gt *gt) +{ + return flush_work(>->reset.worker); +} + +/** + * xe_gt_reset - perform synchronous reset + * @gt: GT structure + * Return: + * %true if it waited for the reset to finish, + * %false if there was no scheduled reset. + */ +static inline bool xe_gt_reset(struct xe_gt *gt) +{ + xe_gt_reset_async(gt); + return xe_gt_wait_for_reset(gt); +} + +/** * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the * first that matches the same reset domain as @class * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index b6adfb9f2030..50fffc9ebf62 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -150,7 +150,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); - xe_gt_reset_async(gt); + xe_gt_reset(gt); } mutex_unlock(&xe->drm.filelist_mutex); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 3e8c351a0eab..e7792858b1e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -132,11 +132,9 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p) static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); + xe_gt_reset(gt); xe_pm_runtime_put(gt_to_xe(gt)); - flush_work(>->reset.worker); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 6bd39b2c5003..604bdc7c8173 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev, } static DEVICE_ATTR_RO(rpe_freq); +static ssize_t rpa_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpa_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); +} +static DEVICE_ATTR_RO(rpa_freq); + static ssize_t rpn_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = { &dev_attr_act_freq.attr, &dev_attr_cur_freq.attr, &dev_attr_rp0_freq.attr, + &dev_attr_rpa_freq.attr, &dev_attr_rpe_freq.attr, &dev_attr_rpn_freq.attr, &dev_attr_min_freq.attr, diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fd80afeef56a..ffd3ba7f6656 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -122,10 +122,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; - for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { - if ((gt->info.engine_mask & BIT(i))) - gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | - VDN_MFXVDENC_POWERGATE_ENABLE(j)); + if (xe->info.platform != XE_DG1) { + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if ((gt->info.engine_mask & BIT(i))) + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | + VDN_MFXVDENC_POWERGATE_ENABLE(j)); + } } fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 5013d674e17d..a1676b787fdc 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, * @group: steering group ID * @instance: steering instance ID * - * Return: the coverted DSS id. + * Return: the converted DSS id. */ u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance) { @@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) * Returns true if the caller should steer to the @group/@instance values * returned. Returns false if the caller need not perform any steering */ -static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, - struct xe_reg_mcr reg_mcr, - u8 *group, u8 *instance) +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance) { const struct xe_reg reg = to_xe_reg(reg_mcr); const struct xe_mmio_range *implicit_ranges; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index c0cd36021c24..bc06520befab 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, u32 value); +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance); + void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 5dc71394372d..11da0228cea7 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -60,6 +60,21 @@ static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format xe_gt_info(gt, "%pV", vaf); } +static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_gt *gt = p->arg; + struct drm_printer dbg; + + /* + * The original xe_gt_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg.origin = p->origin; + + drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); +} + /** * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err() * @gt: the &xe_gt pointer to use in xe_gt_err() @@ -90,4 +105,20 @@ static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt) return p; } +/** + * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg() + * @gt: the &xe_gt pointer to use in xe_gt_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt) +{ + struct drm_printer p = { + .printfn = __xe_gt_printfn_dbg, + .arg = gt, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 192643d63d22..878e96281c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -207,6 +207,11 @@ static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); } +static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority); +} + static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) { return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); @@ -1540,8 +1545,6 @@ static u64 pf_query_max_lmem(struct xe_gt *gt) #ifdef CONFIG_DRM_XE_DEBUG_SRIOV #define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */ -#else -#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */ #endif static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) @@ -1767,6 +1770,77 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi return preempt_timeout; } +static const char *sched_priority_unit(u32 priority) +{ + return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" : + priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" : + priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" : + "(?)"; +} + +static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_sched_priority(gt, vfid, priority); + if (unlikely(err)) + return err; + + config->sched_priority = priority; + return 0; +} + +static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->sched_priority; +} + +/** + * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @priority: requested scheduling priority + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sched_priority(gt, vfid, priority); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, priority, + xe_gt_sriov_pf_config_get_sched_priority(gt, vfid), + "scheduling priority", sched_priority_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) scheduling priority. + */ +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + u32 priority; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + priority = pf_get_sched_priority(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return priority; +} + static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) { lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -2046,7 +2120,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { - bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); + bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; @@ -2087,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid) * * This function can only be called on PF. * - * Return: mininum size of the buffer or the number of bytes saved, + * Return: minimum size of the buffer or the number of bytes saved, * or a negative error code on failure. */ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 0c55aa40a1a7..f894e9d4abba 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -44,6 +44,9 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); + u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, enum xe_guc_klv_threshold_index index); int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 2d3b73d78f14..686c7b3b6d7a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -33,6 +33,8 @@ struct xe_gt_sriov_config { u32 exec_quantum; /** @preempt_timeout: preemption timeout in microseconds. */ u32 preempt_timeout; + /** @sched_priority: scheduling priority. */ + u32 sched_priority; /** @thresholds: GuC thresholds for adverse events notifications. */ u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 05df4ab3514b..b2521dd6ec42 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -164,6 +164,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │ │ ├── contexts_spare * │ │ ├── exec_quantum_ms * │ │ ├── preempt_timeout_us + * │ │ ├── sched_priority * │ ├── vf1 * │ │ ├── ggtt_quota * │ │ ├── lmem_quota @@ -171,6 +172,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │ │ ├── contexts_quota * │ │ ├── exec_quantum_ms * │ │ ├── preempt_timeout_us + * │ │ ├── sched_priority */ #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ @@ -209,6 +211,7 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); /* * /sys/kernel/debug/dri/0/ @@ -295,6 +298,8 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &exec_quantum_fops); debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, &preempt_timeout_fops); + debugfs_create_file_unsafe("sched_priority", 0644, parent, parent, + &sched_priority_fops); /* register all threshold attributes */ #define register_threshold_attribute(TAG, NAME, ...) \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h index 0bf12d89ceb2..6af219d93c3b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h @@ -18,7 +18,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index fae5be5a2a11..c00fb354705f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -135,14 +135,33 @@ static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 valu return 0; } +static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority) +{ + unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n < total_vfs; n++) + gt->sriov.pf.vfs[n].config.sched_priority = priority; +} + static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) { + int err; + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); - return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, - >->sriov.pf.policy.guc.sched_if_idle, - enable); + err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, + >->sriov.pf.policy.guc.sched_if_idle, + enable); + + if (!err) + pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL : + GUC_SCHED_PRIORITY_LOW); + return err; } static int pf_reprovision_sched_if_idle(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index d3baba50f085..cca5d5732802 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -27,6 +27,7 @@ #include "xe_guc_relay.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_vf.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -223,6 +224,44 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) +{ + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), + }; + int ret; + + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +/** + * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. + * @gt: the &xe_gt struct instance linked to target GuC + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) +{ + struct xe_guc *guc = >->uc.guc; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_vf_notify_resfix_done(guc); + if (unlikely(err)) + xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", + ERR_PTR(err)); + else + xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); + + return err; +} + static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, u32 *value, u32 value_len) { @@ -692,6 +731,30 @@ failed: return err; } +/** + * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, + * or just mark that a GuC is ready for it. + * @gt: the &xe_gt struct instance linked to target GuC + * + * This function shall be called only by VF. + */ +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); + /* + * We need to be certain that if all flags were set, at least one + * thread will notice that and schedule the recovery. + */ + smp_mb__after_atomic(); + + xe_gt_sriov_info(gt, "ready for recovery after migration\n"); + xe_sriov_vf_start_migration_recovery(xe); +} + static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e541ce57bec2..912d20814261 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -17,6 +17,8 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index c7364a5aef8f..7a6c1d808e41 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -12,7 +12,7 @@ /** * xe_gt_stats_incr - Increments the specified stats counter - * @gt: graphics tile + * @gt: GT structure * @id: xe_gt_stats_id type id that needs to be incremented * @incr: value to be incremented with * @@ -32,7 +32,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { /** * xe_gt_stats_print_info - Print the GT stats - * @gt: graphics tile + * @gt: GT structure * @p: drm_printer where it will be printed out. * * This prints out all the available GT stats. diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 91d944f6c4e4..38325ef53617 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -6,15 +6,11 @@ #ifndef _XE_GT_STATS_H_ #define _XE_GT_STATS_H_ +#include "xe_gt_stats_types.h" + struct xe_gt; struct drm_printer; -enum xe_gt_stats_id { - XE_GT_STATS_ID_TLB_INVAL, - /* must be the last entry */ - __XE_GT_STATS_NUM_IDS, -}; - #ifdef CONFIG_DEBUG_FS int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h new file mode 100644 index 000000000000..2fc055e39f27 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_TYPES_H_ +#define _XE_GT_STATS_TYPES_H_ + +enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 03b225364101..8db78d616b6f 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -8,6 +8,7 @@ #include <regs/xe_gt_regs.h> #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" #include "xe_mmio.h" @@ -53,6 +54,7 @@ static u32 read_status(struct xe_gt *gt) { u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; + xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); return status; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 3cb228c773cd..0a93831c0a02 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe __invalidation_fence_signal(xe, fence); } +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + if (WARN_ON_ONCE(!fence->gt)) + return; + + __invalidation_fence_signal(gt_to_xe(fence->gt), fence); +} + static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, @@ -98,15 +106,15 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) } /** - * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state - * @gt: graphics tile + * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state + * @gt: GT structure * * Initialize GT TLB invalidation state, purely software initialization, should * be called once during driver load. * * Return: 0 on success, negative error code on error. */ -int xe_gt_tlb_invalidation_init(struct xe_gt *gt) +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); @@ -120,7 +128,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: graphics tile + * @gt: GT structure * * Signal any pending invalidation fences, should be called during a GT reset */ @@ -236,7 +244,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @@ -253,14 +261,23 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, 0, /* seqno, replaced in send_tlb_invalidation */ MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; + int ret; + + ret = send_tlb_invalidation(>->uc.guc, fence, action, + ARRAY_SIZE(action)); + /* + * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches + * should be nuked on a GT reset so this error can be ignored. + */ + if (ret == -ECANCELED) + return 0; - return send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); + return ret; } /** * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: graphics tile + * @gt: GT structure * * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is * synchronous. @@ -309,7 +326,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range * - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @start: start address @@ -395,7 +412,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL * @vma: VMA to invalidate diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index f430d5797af7..672acfcdf0d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -14,7 +14,8 @@ struct xe_gt; struct xe_guc; struct xe_vma; -int xe_gt_tlb_invalidation_init(struct xe_gt *gt); +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); + void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, @@ -28,6 +29,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a287b98ee70b..6e66bf0e8b3f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -11,10 +11,10 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" -#include "xe_gt_stats.h" +#include "xe_gt_stats_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" -#include "xe_oa.h" +#include "xe_oa_types.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7f704346a8f4..408365dfe4ee 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -44,7 +44,15 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, struct xe_bo *bo) { struct xe_device *xe = guc_to_xe(guc); - u32 addr = xe_bo_ggtt_addr(bo); + u32 addr; + + /* + * For most BOs, the address on the allocating tile is fine. However for + * some, e.g. G2G CTB, the address on a specific tile is required as it + * might be different for each tile. So, just always ask for the address + * on the target GuC. + */ + addr = __xe_bo_ggtt_addr(bo, gt_to_tile(guc_to_gt(guc))->id); /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); @@ -139,6 +147,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } +static bool needs_wa_dual_queue(struct xe_gt *gt) +{ + /* + * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions + * on RCS and CCSes with different address spaces, which on DG2 is + * required as a WA for an HW bug. + */ + if (XE_WA(gt, 22011391025)) + return true; + + /* + * On newer platforms, the HW has been updated to not allow parallel + * execution of different address spaces, so the RCS/CCS will stall the + * context switch if one of the other RCS/CCSes is busy with a different + * address space. While functionally correct, having a submission + * stalled on the HW limits the GuC ability to shuffle things around and + * can cause complications if the non-stalled submission runs for a long + * time, because the GuC doesn't know that the stalled submission isn't + * actually running and might declare it as hung. Therefore, we enable + * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines + * to move management back to the GuC. + */ + if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + return true; + + return false; +} + static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -151,7 +187,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; - if (XE_WA(gt, 22011391025)) + if (needs_wa_dual_queue(gt)) flags |= GUC_WA_DUAL_QUEUE; /* @@ -244,6 +280,293 @@ static void guc_write_params(struct xe_guc *guc) xe_mmio_write32(>->mmio, SOFT_SCRATCH(1 + i), guc->params[i]); } +static int guc_action_register_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev, + u32 desc_addr, u32 buff_addr, u32 size) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 action[] = { + XE_GUC_ACTION_REGISTER_G2G, + FIELD_PREP(XE_G2G_REGISTER_SIZE, size / SZ_4K - 1) | + FIELD_PREP(XE_G2G_REGISTER_TYPE, type) | + FIELD_PREP(XE_G2G_REGISTER_TILE, dst_tile) | + FIELD_PREP(XE_G2G_REGISTER_DEVICE, dst_dev), + desc_addr, + buff_addr, + }; + + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); + xe_assert(xe, !(size % SZ_4K)); + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static int guc_action_deregister_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_G2G, + FIELD_PREP(XE_G2G_DEREGISTER_TYPE, type) | + FIELD_PREP(XE_G2G_DEREGISTER_TILE, dst_tile) | + FIELD_PREP(XE_G2G_DEREGISTER_DEVICE, dst_dev), + }; + + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +#define G2G_DEV(gt) (((gt)->info.type == XE_GT_TYPE_MAIN) ? 0 : 1) + +#define G2G_BUFFER_SIZE (SZ_4K) +#define G2G_DESC_SIZE (64) +#define G2G_DESC_AREA_SIZE (SZ_4K) + +/* + * Generate a unique id for each bi-directional CTB for each pair of + * near and far tiles/devices. The id can then be used as an index into + * a single allocation that is sub-divided into multiple CTBs. + * + * For example, with two devices per tile and two tiles, the table should + * look like: + * Far <tile>.<dev> + * 0.0 0.1 1.0 1.1 + * N 0.0 --/-- 00/01 02/03 04/05 + * e 0.1 01/00 --/-- 06/07 08/09 + * a 1.0 03/02 07/06 --/-- 10/11 + * r 1.1 05/04 09/08 11/10 --/-- + * + * Where each entry is Rx/Tx channel id. + * + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would + * be reading from channel #11 and writing to channel #10. Whereas, + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. + */ +static unsigned int g2g_slot(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, + u32 type, u32 max_inst, bool have_dev) +{ + u32 near = near_tile, far = far_tile; + u32 idx = 0, x, y, direction; + int i; + + if (have_dev) { + near = (near << 1) | near_dev; + far = (far << 1) | far_dev; + } + + /* No need to send to one's self */ + if (far == near) + return -1; + + if (far > near) { + /* Top right table half */ + x = far; + y = near; + + /* T/R is 'forwards' direction */ + direction = type; + } else { + /* Bottom left table half */ + x = near; + y = far; + + /* B/L is 'backwards' direction */ + direction = (1 - type); + } + + /* Count the rows prior to the target */ + for (i = y; i > 0; i--) + idx += max_inst - i; + + /* Count this row up to the target */ + idx += (x - 1 - y); + + /* Slots are in Rx/Tx pairs */ + idx *= 2; + + /* Pick Rx/Tx direction */ + idx += direction; + + return idx; +} + +static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 type, bool have_dev) +{ + struct xe_gt *near_gt = guc_to_gt(near_guc); + struct xe_device *xe = gt_to_xe(near_gt); + struct xe_bo *g2g_bo; + u32 near_tile = gt_to_tile(near_gt)->id; + u32 near_dev = G2G_DEV(near_gt); + u32 far_tile = gt_to_tile(far_gt)->id; + u32 far_dev = G2G_DEV(far_gt); + u32 max = xe->info.gt_count; + u32 base, desc, buf; + int slot; + + /* G2G is not allowed between different cards */ + xe_assert(xe, xe == gt_to_xe(far_gt)); + + g2g_bo = near_guc->g2g.bo; + xe_assert(xe, g2g_bo); + + slot = g2g_slot(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); + xe_assert(xe, slot >= 0); + + base = guc_bo_ggtt_addr(near_guc, g2g_bo); + desc = base + slot * G2G_DESC_SIZE; + buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; + + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + + return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, + desc, buf, G2G_BUFFER_SIZE); +} + +static void guc_g2g_deregister(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type) +{ + guc_action_deregister_g2g_buffer(guc, type, far_tile, far_dev); +} + +static u32 guc_g2g_size(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int count = xe->info.gt_count; + u32 num_channels = (count * (count - 1)) / 2; + + xe_assert(xe, num_channels * XE_G2G_TYPE_LIMIT * G2G_DESC_SIZE <= G2G_DESC_AREA_SIZE); + + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; +} + +static bool xe_guc_g2g_wanted(struct xe_device *xe) +{ + /* Can't do GuC to GuC communication if there is only one GuC */ + if (xe->info.gt_count <= 1) + return false; + + /* No current user */ + return false; +} + +static int guc_g2g_alloc(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + u32 g2g_size; + + if (guc->g2g.bo) + return 0; + + if (gt->info.id != 0) { + struct xe_gt *root_gt = xe_device_get_gt(xe, 0); + struct xe_guc *root_guc = &root_gt->uc.guc; + struct xe_bo *bo; + + bo = xe_bo_get(root_guc->g2g.bo); + if (!bo) + return -ENODEV; + + guc->g2g.bo = bo; + guc->g2g.owned = false; + return 0; + } + + g2g_size = guc_g2g_size(guc); + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + guc->g2g.bo = bo; + guc->g2g.owned = true; + + return 0; +} + +static void guc_g2g_fini(struct xe_guc *guc) +{ + if (!guc->g2g.bo) + return; + + /* Unpinning the owned object is handled by generic shutdown */ + if (!guc->g2g.owned) + xe_bo_put(guc->g2g.bo); + + guc->g2g.bo = NULL; +} + +static int guc_g2g_start(struct xe_guc *guc) +{ + struct xe_gt *far_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int i, j; + int t, err; + bool have_dev; + + if (!guc->g2g.bo) { + int ret; + + ret = guc_g2g_alloc(guc); + if (ret) + return ret; + } + + /* GuC interface will need extending if more GT device types are ever created. */ + xe_gt_assert(gt, (gt->info.type == XE_GT_TYPE_MAIN) || (gt->info.type == XE_GT_TYPE_MEDIA)); + + /* Channel numbering depends on whether there are multiple GTs per tile */ + have_dev = xe->info.gt_count > xe->info.tile_count; + + for_each_gt(far_gt, xe, i) { + u32 far_tile, far_dev; + + if (far_gt->info.id == gt->info.id) + continue; + + far_tile = gt_to_tile(far_gt)->id; + far_dev = G2G_DEV(far_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { + err = guc_g2g_register(guc, far_gt, t, have_dev); + if (err) { + while (--t >= 0) + guc_g2g_deregister(guc, far_tile, far_dev, t); + goto err_deregister; + } + } + } + + return 0; + +err_deregister: + for_each_gt(far_gt, xe, j) { + u32 tile, dev; + + if (far_gt->info.id == gt->info.id) + continue; + + if (j >= i) + break; + + tile = gt_to_tile(far_gt)->id; + dev = G2G_DEV(far_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) + guc_g2g_deregister(guc, tile, dev, t); + } + + return err; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -253,6 +576,8 @@ static void guc_fini_hw(void *arg) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_uc_fini_hw(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); + + guc_g2g_fini(guc); } /** @@ -423,7 +748,16 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) int xe_guc_post_load_init(struct xe_guc *guc) { + int ret; + xe_guc_ads_populate_post_load(&guc->ads); + + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { + ret = guc_g2g_start(guc); + if (ret) + return ret; + } + guc->submission_state.enabled = true; return 0; @@ -945,7 +1279,6 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - xe_assert(xe, !xe_guc_ct_enabled(&guc->ct)); xe_assert(xe, len); xe_assert(xe, len <= VF_SW_FLAG_COUNT); xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); @@ -1099,10 +1432,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) return guc_self_cfg(guc, key, 2, val); } +static void xe_guc_sw_0_irq_handler(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (IS_SRIOV_VF(gt_to_xe(gt))) + xe_gt_sriov_vf_migrated_event_handler(gt); +} + void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) { if (iir & GUC_INTR_GUC2HOST) xe_guc_ct_irq_handler(&guc->ct); + + if (iir & GUC_INTR_SW_INT_0) + xe_guc_sw_0_irq_handler(guc); } void xe_guc_sanitize(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 4e746ae98888..fab259adc380 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -29,6 +29,7 @@ #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wa.h" +#include "xe_gt_mcr.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -231,11 +232,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads) guc_ads_private_data_size(ads); } -static bool needs_wa_1607983814(struct xe_device *xe) -{ - return GRAPHICS_VERx100(xe) < 1250; -} - static size_t calculate_regset_size(struct xe_gt *gt) { struct xe_reg_sr_entry *sr_entry; @@ -250,7 +246,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (needs_wa_1607983814(gt_to_xe(gt))) + if (XE_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -701,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, .flags = reg.masked ? GUC_REGSET_MASKED : 0, }; + if (reg.mcr) { + struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr); + u8 group, instance; + + bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg, + &group, &instance); + + if (steer) { + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group); + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance); + entry.flags |= GUC_REGSET_STEERING_NEEDED; + } + } + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), &entry, sizeof(entry)); } @@ -709,7 +719,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, struct iosys_map *regset_map, struct xe_hw_engine *hwe) { - struct xe_device *xe = ads_to_xe(ads); struct xe_hw_engine *hwe_rcs_reset_domain = xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); struct xe_reg_sr_entry *entry; @@ -740,8 +749,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - /* Wa_1607983814 */ - if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index d63912d28246..f6d523e4c5fe 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1806,7 +1806,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm if (!devcore_snapshot->matched_node) return; - xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC); xe_gt_assert(gt, snapshot->hwe); capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class); @@ -1815,7 +1814,8 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm snapshot->name ? snapshot->name : "", snapshot->logical_instance); drm_printf(p, "\tCapture_source: %s\n", - snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual"); + devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? + "GuC" : "Manual"); drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", snapshot->forcewake.domain, snapshot->forcewake.ref); @@ -1840,29 +1840,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm } /** - * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job. - * @job: The job object. + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue. + * @q: The exec queue object * - * Search within the capture outlist for the job, could be used for check if - * GuC capture is ready for the job. + * Search within the capture outlist for the queue, could be used for check if + * GuC capture is ready for the queue. * If found, the locked boolean of the node will be flagged. * * Returns: found guc-capture node ptr else NULL */ struct __guc_capture_parsed_output * -xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) +xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - struct xe_exec_queue *q; struct xe_device *xe; u16 guc_class = GUC_LAST_ENGINE_CLASS + 1; struct xe_devcoredump_snapshot *ss; - if (!job) - return NULL; - - q = job->q; if (!q || !q->gt) return NULL; @@ -1874,7 +1869,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC) return ss->matched_node; - /* Find hwe for the job */ + /* Find hwe for the queue */ for_each_hw_engine(hwe, q->gt, id) { if (hwe != q->hwe) continue; @@ -1906,17 +1901,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) } /** - * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine - * @job: The job object + * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine + * @q: The exec queue object * * Take snapshot of associated HW Engine * * Returns: None. */ void -xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) +xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) { - struct xe_exec_queue *q = job->q; struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; struct xe_hw_engine *hwe; @@ -1934,11 +1928,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) } if (!coredump->snapshot.hwe[id]) { - coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job); + coredump->snapshot.hwe[id] = + xe_hw_engine_snapshot_capture(hwe, q); } else { struct __guc_capture_parsed_output *new; - new = xe_guc_capture_get_matching_and_lock(job); + new = xe_guc_capture_get_matching_and_lock(q); if (new) { struct xe_guc *guc = &q->gt->uc.guc; @@ -1960,7 +1955,7 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) } /* - * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes + * xe_guc_capture_put_matched_nodes - Cleanup matched nodes * @guc: The GuC object * * Free matched node and all nodes with the equal guc_id from diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h index 97a795d13dd1..20a078dc4b85 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.h +++ b/drivers/gpu/drm/xe/xe_guc_capture.h @@ -11,10 +11,10 @@ #include "xe_guc.h" #include "xe_guc_fwif.h" +struct xe_exec_queue; struct xe_guc; struct xe_hw_engine; struct xe_hw_engine_snapshot; -struct xe_sched_job; static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class) { @@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc); const struct __guc_mmio_reg_descr_group * xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, enum guc_capture_list_class_type capture_class, bool is_ext); -struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job); +struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q); void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot); void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); -void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job); +void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q); void xe_guc_capture_steered_list_init(struct xe_guc *guc); void xe_guc_capture_put_matched_nodes(struct xe_guc *guc); int xe_guc_capture_init(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index 2057125b1bfa..ca2d390ccbee 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -22,7 +22,7 @@ enum capture_register_data_type { * struct __guc_mmio_reg_descr - GuC mmio register descriptor * * xe_guc_capture module uses these structures to define a register - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -48,7 +48,7 @@ struct __guc_mmio_reg_descr { * * xe_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8aeb1789805c..8b65c5e959cc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -54,6 +54,7 @@ enum { CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */ + CT_DEAD_CRASH, /* 0x8000 */ }; static void ct_dead_worker_func(struct work_struct *w); @@ -469,8 +470,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) * after any existing dead state has been dumped. */ spin_lock_irq(&ct->dead.lock); - if (ct->dead.reason) + if (ct->dead.reason) { ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); + queue_work(system_unbound_wq, &ct->dead.worker); + } spin_unlock_irq(&ct->dead.lock); #endif @@ -707,7 +710,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, --len; ++action; - /* Write H2G ensuring visable before descriptor update */ + /* Write H2G ensuring visible before descriptor update */ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(xe); @@ -1017,7 +1020,6 @@ retry_same_fence: } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); - if (!ret) { LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) { @@ -1121,6 +1123,24 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } +static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) +{ + struct xe_gt *gt = ct_to_gt(ct); + + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + xe_gt_err(gt, "GuC Crash dump notification\n"); + else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION) + xe_gt_err(gt, "GuC Exception notification\n"); + else + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action); + + CT_DEAD(ct, NULL, CRASH); + + kick_reset(ct); + + return 0; +} + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1295,13 +1315,17 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case GUC_ACTION_GUC2PF_ADVERSE_EVENT: ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); break; + case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: + case XE_GUC_ACTION_NOTIFY_EXCEPTION: + ret = guc_crash_process_msg(ct, action); + break; default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } if (ret) { - xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", - action, ERR_PTR(ret)); + xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n", + action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg); CT_DEAD(ct, NULL, PROCESS_FAILED); } @@ -1359,7 +1383,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) * this function and nowhere else. Hence, they cannot be different * unless two g2h_read calls are running concurrently. Which is not * possible because it is guarded by ct->fast_lock. And yet, some - * discrete platforms are reguarly hitting this error :(. + * discrete platforms are regularly hitting this error :(. * * desc_head rolling backwards shouldn't cause any noticeable * problems - just a delay in GuC being allowed to proceed past that diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 08ffe59f22fa..057153f89b30 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -17,6 +17,7 @@ #define G2H_LEN_DW_TLB_INVALIDATE 3 #define GUC_ID_MAX 65535 +#define GUC_ID_UNKNOWN 0xffffffff #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c index 9d99fe266d97..146a6eda9e06 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -49,6 +49,8 @@ const char *xe_guc_klv_key_to_string(u16 key) return "begin_db_id"; case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: return "begin_ctx_id"; + case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY: + return "sched_priority"; /* VF CFG threshold keys */ #define define_threshold_key_to_string_case(TAG, NAME, ...) \ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e8b9faeaef64..df7f130fb663 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -38,6 +38,7 @@ #define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) +#define RPA_MASK REG_GENMASK(31, 16) #define GT_PERF_STATUS XE_REG(0x1381b4) #define CAGF_MASK REG_GENMASK(19, 11) @@ -328,6 +329,19 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } +static void mtl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(>->mmio, MTL_MPA_FREQUENCY); + else + reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); + + pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); +} + static void mtl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -341,6 +355,25 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } +static void tgl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + else + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + + pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + static void tgl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -365,10 +398,13 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1270) + if (GRAPHICS_VERx100(xe) >= 1270) { + mtl_update_rpa_value(pc); mtl_update_rpe_value(pc); - else + } else { + tgl_update_rpa_value(pc); tgl_update_rpe_value(pc); + } /* * RPe is decided at runtime by PCODE. In the rare case where that's @@ -421,8 +457,8 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } @@ -448,6 +484,19 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) } /** + * xe_guc_pc_get_rpa_freq - Get the RPa freq + * @pc: The GuC PC + * + * Returns: RPa freq. + */ +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) +{ + pc_update_rp_values(pc); + + return pc->rpa_freq; +} + +/** * xe_guc_pc_get_rpe_freq - Get the RPe freq * @pc: The GuC PC * @@ -481,10 +530,10 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret; + xe_device_assert_mem_access(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -492,24 +541,12 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) goto out; } - /* - * GuC SLPC plays with min freq request when GuCRC is enabled - * Block RC6 for a more reliable read. - */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - ret = -ETIMEDOUT; - goto fw; - } - ret = pc_action_query_task_state(pc); if (ret) - goto fw; + goto out; *freq = pc_get_min_freq(pc); -fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); out: mutex_unlock(&pc->freq_lock); return ret; @@ -969,8 +1006,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index efda432fadfc..619f59cd633c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -21,6 +21,7 @@ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 13810be015db..2978ac9a249b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -17,6 +17,8 @@ struct xe_guc_pc { struct xe_bo *bo; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; + /** @rpa_freq: HW RPa frequency - The Achievable one */ + u32 rpa_freq; /** @rpe_freq: HW RPe frequency - The Efficient one */ u32 rpe_freq; /** @rpn_freq: HW RPN frequency - The Minimum one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6f4a9812b4f4..913c74d6e2ae 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -412,12 +412,11 @@ static const int xe_exec_queue_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); @@ -451,12 +450,11 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_MLRC_REG_SIZE]; int len = 0; int i; - xe_assert(xe, xe_exec_queue_is_parallel(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -479,7 +477,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); + xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); @@ -513,7 +511,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct xe_lrc *lrc = q->lrc[0]; struct guc_ctxt_registration_info info; - xe_assert(xe, !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -603,7 +601,7 @@ static int wq_noop_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); + xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -643,13 +641,13 @@ static void wq_item_append(struct xe_exec_queue *q) wqi[i++] = lrc->ring.tail / sizeof(u64); } - xe_assert(xe, i == wqi_size / sizeof(u32)); + xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); + xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); xe_device_wmb(xe); @@ -661,7 +659,6 @@ static void wq_item_append(struct xe_exec_queue *q) static void submit_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_lrc *lrc = q->lrc[0]; u32 action[3]; u32 g2h_len = 0; @@ -669,7 +666,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) int len = 0; bool extra_submit = false; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); if (xe_exec_queue_is_parallel(q)) wq_item_append(q); @@ -716,12 +713,11 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct dma_fence *fence = NULL; bool lr = xe_exec_queue_is_lr(q); - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); + xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); @@ -823,7 +819,7 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) */ void xe_guc_submit_wedge(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; int err; @@ -833,7 +829,8 @@ void xe_guc_submit_wedge(struct xe_guc *guc) err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); return; } @@ -865,11 +862,10 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) container_of(w, struct xe_guc_exec_queue, lr_tdr); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; bool wedged; - xe_assert(xe, xe_exec_queue_is_lr(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); @@ -903,13 +899,19 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) !exec_queue_pending_disable(q) || xe_guc_read_stopped(guc), HZ * 5); if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); + xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; } } + if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) + xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); + xe_sched_submission_start(sched); } @@ -1068,13 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * do manual capture first and decide later if we need to use it */ if (!exec_queue_killed(q) && !xe->devcoredump.captured && - !xe_guc_capture_get_matching_and_lock(job)) { + !xe_guc_capture_get_matching_and_lock(q)) { /* take force wake before engine register manual capture */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); xe_force_wake_put(gt_to_fw(q->gt), fw_ref); } @@ -1132,7 +1134,12 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret || xe_guc_read_stopped(guc)) { trigger_reset: if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); + xe_gt_warn(guc_to_gt(guc), + "Schedule disable failed to respond, guc_id=%d", + q->guc->id); + xe_devcoredump(q, job, + "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", + q->guc->id, ret, xe_guc_read_stopped(guc)); set_exec_queue_extra_ref(q); xe_exec_queue_get(q); /* GT reset owns this */ set_exec_queue_banned(q); @@ -1162,7 +1169,10 @@ trigger_reset: trace_xe_sched_job_timedout(job); if (!exec_queue_killed(q)) - xe_devcoredump(job); + xe_devcoredump(q, job, + "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags); /* * Kernel jobs should never fail, nor should VM jobs if they do @@ -1216,7 +1226,7 @@ sched_enable: enable_scheduling(q); rearm: /* - * XXX: Ideally want to adjust timeout based on current exection time + * XXX: Ideally want to adjust timeout based on current execution time * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ @@ -1277,9 +1287,8 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); if (exec_queue_registered(q)) @@ -1315,11 +1324,10 @@ static void __suspend_fence_signal(struct xe_exec_queue *q) static void suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - xe_guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || + xe_guc_read_stopped(guc)); + xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); __suspend_fence_signal(q); } @@ -1415,12 +1423,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; int err, i; - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); + xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1633,9 +1640,8 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); xe_sched_msg_lock(sched); guc_exec_queue_try_add_msg(q, msg, RESUME); @@ -1708,7 +1714,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) ban = true; } } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { + !xe_lrc_ring_is_idle(q->lrc[0])) { ban = true; } @@ -1747,9 +1753,8 @@ void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); @@ -1791,9 +1796,8 @@ int xe_guc_submit_start(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); @@ -1814,22 +1818,22 @@ int xe_guc_submit_start(struct xe_guc *guc) static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); return NULL; } q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); return NULL; } - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); + xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); + xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); return q; } @@ -1898,15 +1902,14 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; + u32 guc_id, runnable_state; - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 2)) return -EPROTO; - } + + guc_id = msg[0]; + runnable_state = msg[1]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1940,14 +1943,13 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1969,14 +1971,13 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -2016,10 +2017,8 @@ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) { u32 status; - if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) { - xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len); + if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) return -EPROTO; - } status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) @@ -2034,13 +2033,21 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; + + guc_id = msg[0]; + + if (guc_id == GUC_ID_UNKNOWN) { + /* + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF + * context. In such case only PF will be notified about that fault. + */ + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); + return 0; } q = g2h_exec_queue_lookup(guc, guc_id); @@ -2062,24 +2069,22 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len != 3)) return -EPROTO; - } guc_class = msg[0]; instance = msg[1]; reason = msg[2]; /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); + xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); - xe_gt_reset_async(guc_to_gt(guc)); + xe_gt_reset_async(gt); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index fa75f57bf5da..83a41ebcdc91 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -64,6 +64,15 @@ struct xe_guc { struct xe_guc_pc pc; /** @dbm: GuC Doorbell Manager */ struct xe_guc_db_mgr dbm; + + /** @g2g: GuC to GuC communication state */ + struct { + /** @g2g.bo: Storage for GuC to GuC communication channels */ + struct xe_bo *bo; + /** @g2g.owned: Is the BO owned by this GT or just mapped in */ + bool owned; + } g2g; + /** @submission_state: GuC submission state */ struct { /** @submission_state.idm: GuC context ID Manager */ @@ -79,6 +88,7 @@ struct xe_guc { /** @submission_state.fini_wq: submit fini wait queue */ wait_queue_head_t fini_wq; } submission_state; + /** @hwconfig: Hardware config state */ struct { /** @hwconfig.bo: buffer object of the hardware config */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 65b2e147c4b9..d765bfd3636b 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe) { struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) return; if (heci_gsc->adev) { @@ -177,7 +177,7 @@ void xe_heci_gsc_init(struct xe_device *xe) const struct heci_gsc_def *def; int ret; - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) return; heci_gsc->irq = -1; @@ -222,7 +222,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) if ((iir & GSC_IRQ_INTF(1)) == 0) return; - if (!HAS_HECI_GSCFI(xe)) { + if (!xe->info.has_heci_gscfi) { drm_warn_once(&xe->drm, "GSC irq: not supported"); return; } @@ -242,7 +242,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir) if ((iir & CSC_IRQ_INTF(1)) == 0) return; - if (!HAS_HECI_CSCFI(xe)) { + if (!xe->info.has_heci_cscfi) { drm_warn_once(&xe->drm, "CSC irq: not supported"); return; } diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2c32dc46f7d4..089834467880 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -159,7 +159,7 @@ void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) * This function allocates the storage of the userptr sg table. * It is caller's responsibility to free it calling sg_free_table. * - * returns: 0 for succuss; negative error no on failure + * returns: 0 for success; negative error no on failure */ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1557acee3523..fc447751fe78 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) { u32 ccs_mask = xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, @@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); @@ -419,7 +422,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | @@ -574,7 +577,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, XE_BO_FLAG_VRAM_IF_DGFX(tile) | @@ -773,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); } } @@ -829,7 +831,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. - * @job: The job object. + * @q: The exec queue object. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -838,7 +840,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) * caller, using `xe_hw_engine_snapshot_free`. */ struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) { struct xe_hw_engine_snapshot *snapshot; struct __guc_capture_parsed_output *node; @@ -864,15 +866,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) return snapshot; - if (job) { + if (q) { /* If got guc capture, set source to GuC */ - node = xe_guc_capture_get_matching_and_lock(job); + node = xe_guc_capture_get_matching_and_lock(q); if (node) { struct xe_device *xe = gt_to_xe(hwe->gt); struct xe_devcoredump *coredump = &xe->devcoredump; coredump->snapshot.matched_node = node; - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); return snapshot; } @@ -880,7 +881,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job /* otherwise, do manual capture */ xe_engine_manual_capture(hwe, snapshot); - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); return snapshot; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index da0a6922a26f..6b5f9fa2a594 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -11,7 +11,7 @@ struct drm_printer; struct drm_xe_engine_class_instance; struct xe_device; -struct xe_sched_job; +struct xe_exec_queue; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job); +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q); void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 719f27ef00a5..e4191a7a2c31 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -106,7 +106,7 @@ struct xe_hw_engine_class_intf { * Contains all the hardware engine state for physical instances. */ struct xe_hw_engine { - /** @gt: graphics tile this hw engine belongs to */ + /** @gt: GT structure this hw engine belongs to */ struct xe_gt *gt; /** @name: name of this hw engine */ const char *name; @@ -165,8 +165,6 @@ enum xe_hw_engine_snapshot_source_id { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; - /** @source: Data source, either manual or GuC */ - enum xe_hw_engine_snapshot_source_id source; /** @hwe: hw engine */ struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index 364a61f4bfda..58a8d09afe5c 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -41,7 +41,7 @@ struct xe_hw_fence_irq { * to a xe_hw_fence_irq, maintains serial seqno. */ struct xe_hw_fence_ctx { - /** @gt: graphics tile of hardware fence context */ + /** @gt: GT structure of hardware fence context */ struct xe_gt *gt; /** @irq: fence irq handler */ struct xe_hw_fence_irq *irq; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index b7995ebd54ab..32f5a67a917b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -10,6 +10,7 @@ #include <drm/drm_managed.h> #include "display/xe_display.h" +#include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_device.h" #include "xe_drv.h" @@ -29,6 +30,11 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) +static int xe_irq_msix_init(struct xe_device *xe); +static void xe_irq_msix_free(struct xe_device *xe); +static int xe_irq_msix_request_irqs(struct xe_device *xe); +static void xe_irq_msix_synchronize_irq(struct xe_device *xe); + static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); @@ -192,7 +198,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { gsc_mask = irqs | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); - } else if (HAS_HECI_GSCFI(xe)) { + } else if (xe->info.has_heci_gscfi) { gsc_mask = GSC_IRQ_INTF(1); } @@ -325,7 +331,7 @@ static void gt_irq_handler(struct xe_tile *tile, if (class == XE_ENGINE_CLASS_OTHER) { /* HECI GSCFI interrupts come from outside of GT */ - if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE) + if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE) xe_heci_gsc_irq_handler(xe, intr_vec); else gt_other_irq_handler(engine_gt, instance, intr_vec); @@ -348,12 +354,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) unsigned long intr_dw[2]; u32 identity[32]; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_ctl = xelp_intr_disable(xe); if (!master_ctl) { @@ -417,12 +419,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) /* TODO: This really shouldn't be copied+pasted */ - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_tile_ctl = dg1_intr_disable(xe); if (!master_tile_ctl) { @@ -459,7 +457,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * the primary tile. */ if (id == 0) { - if (HAS_HECI_CSCFI(xe)) + if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); @@ -508,7 +506,7 @@ static void gt_irq_reset(struct xe_tile *tile) if ((tile->media_gt && xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || - HAS_HECI_GSCFI(tile_to_xe(tile))) { + tile_to_xe(tile)->info.has_heci_gscfi) { xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0); @@ -580,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_reset(xe); + if (xe_device_uses_memirq(xe)) { + for_each_tile(tile, xe, id) + xe_memirq_reset(&tile->memirq); + } + for_each_tile(tile, xe, id) { if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_reset(tile); @@ -622,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_postinstall(xe); + if (xe_device_uses_memirq(xe)) { + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + xe_memirq_postinstall(&tile->memirq); + } + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); /* @@ -644,12 +655,8 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg) struct xe_tile *tile; unsigned int id; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); for_each_tile(tile, xe, id) xe_memirq_handler(&tile->memirq); @@ -668,63 +675,85 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe) return xelp_irq_handler; } -static void irq_uninstall(void *arg) +static int xe_irq_msi_request_irqs(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + irq_handler_t irq_handler; + int irq, err; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + irq = pci_irq_vector(pdev, 0); + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); + return err; + } + + return 0; +} + +static void xe_irq_msi_free(struct xe_device *xe) { - struct xe_device *xe = arg; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int irq; - if (!xe->irq.enabled) + irq = pci_irq_vector(pdev, 0); + free_irq(irq, xe); +} + +static void irq_uninstall(void *arg) +{ + struct xe_device *xe = arg; + + if (!atomic_xchg(&xe->irq.enabled, 0)) return; - xe->irq.enabled = false; xe_irq_reset(xe); - irq = pci_irq_vector(pdev, 0); - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); +} + +int xe_irq_init(struct xe_device *xe) +{ + spin_lock_init(&xe->irq.lock); + + return xe_irq_msix_init(xe); } int xe_irq_install(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - unsigned int irq_flags = PCI_IRQ_MSIX; - irq_handler_t irq_handler; - int err, irq, nvec; - - irq_handler = xe_irq_handler(xe); - if (!irq_handler) { - drm_err(&xe->drm, "No supported interrupt handler"); - return -EINVAL; - } + unsigned int irq_flags = PCI_IRQ_MSI; + int nvec = 1; + int err; xe_irq_reset(xe); - nvec = pci_msix_vec_count(pdev); - if (nvec <= 0) { - if (nvec == -EINVAL) { - /* MSIX capability is not supported in the device, using MSI */ - irq_flags = PCI_IRQ_MSI; - nvec = 1; - } else { - drm_err(&xe->drm, "MSIX: Failed getting count\n"); - return nvec; - } + if (xe_device_has_msix(xe)) { + nvec = xe->irq.msix.nvec; + irq_flags = PCI_IRQ_MSIX; } err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags); if (err < 0) { - drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); + drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err); return err; } - irq = pci_irq_vector(pdev, 0); - err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); - if (err < 0) { - drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); + err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) : + xe_irq_msi_request_irqs(xe); + if (err) return err; - } - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_postinstall(xe); @@ -735,20 +764,28 @@ int xe_irq_install(struct xe_device *xe) return 0; free_irq_handler: - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); return err; } -void xe_irq_suspend(struct xe_device *xe) +static void xe_irq_msi_synchronize_irq(struct xe_device *xe) { - int irq = to_pci_dev(xe->drm.dev)->irq; + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); +} - spin_lock_irq(&xe->irq.lock); - xe->irq.enabled = false; /* no new irqs */ - spin_unlock_irq(&xe->irq.lock); +void xe_irq_suspend(struct xe_device *xe) +{ + atomic_set(&xe->irq.enabled, 0); /* no new irqs */ - synchronize_irq(irq); /* flush irqs */ + /* flush irqs */ + if (xe_device_has_msix(xe)) + xe_irq_msix_synchronize_irq(xe); + else + xe_irq_msi_synchronize_irq(xe); xe_irq_reset(xe); /* turn irqs off */ } @@ -762,10 +799,205 @@ void xe_irq_resume(struct xe_device *xe) * 1. no irq will arrive before the postinstall * 2. display is not yet resumed */ - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_reset(xe); xe_irq_postinstall(xe); /* turn irqs on */ for_each_gt(gt, xe, id) xe_irq_enable_hwe(gt); } + +/* MSI-X related definitions and functions below. */ + +enum xe_irq_msix_static { + GUC2HOST_MSIX = 0, + DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX, + /* Must be last */ + NUM_OF_STATIC_MSIX, +}; + +static int xe_irq_msix_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int nvec = pci_msix_vec_count(pdev); + + if (nvec == -EINVAL) + return 0; /* MSI */ + + if (nvec < 0) { + drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec); + return nvec; + } + + xe->irq.msix.nvec = nvec; + xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC); + return 0; +} + +static irqreturn_t guc2host_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile; + u8 id; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, id) + xe_guc_irq_handler(&tile->primary_gt->uc.guc, + GUC_INTR_GUC2HOST); + + return IRQ_HANDLED; +} + +static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg) +{ + unsigned int tile_id, gt_id; + struct xe_device *xe = arg; + struct xe_memirq *memirq; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_tile *tile; + struct xe_gt *gt; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, tile_id) { + memirq = &tile->memirq; + if (!memirq->bo) + continue; + + for_each_gt(gt, xe, gt_id) { + if (gt->tile != tile) + continue; + + for_each_hw_engine(hwe, gt, id) + xe_memirq_hwe_handler(memirq, hwe); + } + } + + return IRQ_HANDLED; +} + +static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf, + bool dynamic_msix, u16 *msix) +{ + struct xa_limit limit; + int ret; + u32 id; + + limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) : + XA_LIMIT(*msix, *msix); + ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL); + if (ret) + return ret; + + if (dynamic_msix) + *msix = id; + + return 0; +} + +static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix) +{ + xa_erase(&xe->irq.msix.indexes, msix); +} + +static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler, + void *irq_buf, const char *name, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret, irq; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) + return irq; + + ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf); + if (ret < 0) + return ret; + + return 0; +} + +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix) +{ + int ret; + + ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix); + if (ret) + return ret; + + ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix); + if (ret) { + drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix); + xe_irq_msix_release_vector(xe, *msix); + return ret; + } + + return 0; +} + +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq; + void *irq_buf; + + irq_buf = xa_load(&xe->irq.msix.indexes, msix); + if (!irq_buf) + return; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) { + drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix); + return; + } + + free_irq(irq, irq_buf); + xe_irq_msix_release_vector(xe, msix); +} + +int xe_irq_msix_request_irqs(struct xe_device *xe) +{ + int err; + u16 msix; + + msix = GUC2HOST_MSIX; + err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe, + DRIVER_NAME "-guc2host", false, &msix); + if (err) + return err; + + msix = DEFAULT_MSIX; + err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe, + DRIVER_NAME "-default-msix", false, &msix); + if (err) { + xe_irq_msix_free_irq(xe, GUC2HOST_MSIX); + return err; + } + + return 0; +} + +void xe_irq_msix_free(struct xe_device *xe) +{ + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + xe_irq_msix_free_irq(xe, msix); + xa_destroy(&xe->irq.msix.indexes); +} + +void xe_irq_msix_synchronize_irq(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + synchronize_irq(pci_irq_vector(pdev, msix)); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index 067514e13675..a28bd577ba52 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -6,13 +6,21 @@ #ifndef _XE_IRQ_H_ #define _XE_IRQ_H_ +#include <linux/interrupt.h> + +#define XE_IRQ_DEFAULT_MSIX 1 + struct xe_device; struct xe_tile; struct xe_gt; +int xe_irq_init(struct xe_device *xe); int xe_irq_install(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); void xe_irq_enable_hwe(struct xe_gt *gt); +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix); +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 4f64c7f4e68d..bbb9ffbf6367 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -25,6 +25,7 @@ #include "xe_map.h" #include "xe_memirq.h" #include "xe_sriov.h" +#include "xe_trace_lrc.h" #include "xe_vm.h" #include "xe_wa.h" @@ -583,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) { struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; struct xe_device *xe = gt_to_xe(hwe->gt); + u8 num_regs; if (!xe_device_uses_memirq(xe)) return; @@ -592,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + num_regs = xe_device_has_msix(xe) ? 3 : 2; + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); + + if (xe_device_has_msix(xe)) { + regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; + /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ + } } static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) @@ -875,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) + struct xe_vm *vm, u32 ring_size, u16 msix_vec) { struct xe_gt *gt = hwe->gt; struct xe_tile *tile = gt_to_tile(gt); @@ -944,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_drm_client_add_bo(vm->xef->client, lrc->bo); } + if (xe_device_has_msix(xe)) { + xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, + xe_memirq_status_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, + xe_memirq_source_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); + } + if (xe_gt_has_indirect_ring_state(gt)) { xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, __xe_lrc_indirect_ring_ggtt_addr(lrc)); @@ -1004,6 +1020,7 @@ err_lrc_finish: * @hwe: Hardware Engine * @vm: The VM (address space) * @ring_size: LRC ring size + * @msix_vec: MSI-X interrupt vector (for platforms that support it) * * Allocate and initialize the Logical Ring Context (LRC). * @@ -1011,7 +1028,7 @@ err_lrc_finish: * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) + u32 ring_size, u16 msix_vec) { struct xe_lrc *lrc; int err; @@ -1020,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size); + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); if (err) { kfree(lrc); return ERR_PTR(err); @@ -1060,6 +1077,14 @@ u32 xe_lrc_ring_tail(struct xe_lrc *lrc) return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; } +static u32 xe_lrc_ring_start(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); +} + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1635,10 +1660,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); + snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; snapshot->tail.memory = xe_lrc_ring_tail(lrc); + snapshot->start = xe_lrc_ring_start(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); @@ -1692,11 +1719,14 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer return; drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tHW Ring address: 0x%08x\n", + snapshot->ring_addr); drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", snapshot->indirect_context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", snapshot->tail.internal, snapshot->tail.memory); + drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); @@ -1758,5 +1788,20 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + trace_xe_lrc_update_timestamp(lrc, *old_ts); + return lrc->ctx_timestamp; } + +/** + * xe_lrc_ring_is_idle() - LRC is idle + * @lrc: Pointer to the lrc. + * + * Compare LRC ring head and tail to determine if idle. + * + * Return: True is ring is idle, False otherwise + */ +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) +{ + return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 40d8f6906d3e..4206e6a8b50a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -25,8 +25,10 @@ struct xe_lrc_snapshot { unsigned long lrc_size, lrc_offset; u32 context_desc; + u32 ring_addr; u32 indirect_context_desc; u32 head; + u32 start; struct { u32 internal; u32 memory; @@ -40,7 +42,7 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size); + u32 ring_size, u16 msix_vec); void xe_lrc_destroy(struct kref *ref); /** @@ -78,6 +80,8 @@ u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); + u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index daf56c846d03..8a77c2423555 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -10,9 +10,13 @@ #define XE_WARN_ON WARN_ON -#define XE_IOCTL_DBG(xe, cond) \ - ((cond) && (drm_dbg(&(xe)->drm, \ - "Ioctl argument check failed at %s:%d: %s", \ - __FILE__, __LINE__, #cond), 1)) +#define XE_IOCTL_DBG(xe, cond) ({ \ + int cond__ = !!(cond); \ + if (cond__) \ + drm_dbg(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond); \ + cond__; \ +}) #endif diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index f833da88150a..404fa2a456d5 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -155,13 +155,6 @@ static const char *guc_name(struct xe_guc *guc) * */ -static void __release_xe_bo(struct drm_device *drm, void *arg) -{ - struct xe_bo *bo = arg; - - xe_bo_unpin_map_no_vm(bo); -} - static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq) { /* @@ -184,14 +177,12 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64)); BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K)); - /* XXX: convert to managed bo */ - bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE | - XE_BO_FLAG_NEEDS_UC | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo = xe_managed_bo_create_pin_map(xe, tile, bo_size, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_NEEDS_UC | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out; @@ -215,7 +206,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0), XE_MEMIRQ_STATUS_OFFSET(0)); - return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo); + return 0; out: memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); @@ -442,6 +433,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + + if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) + xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); } /** diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1b97d90aadda..278bc96cf593 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1506,7 +1506,7 @@ err_bb: * using the default engine for the updates, they will be performed in the * order they grab the job_mutex. If different engines are used, external * synchronization is needed for overlapping updates to maintain page-table - * consistency. Note that the meaing of "overlapping" is that the updates + * consistency. Note that the meaning of "overlapping" is that the updates * touch the same page-table, which might be a higher-level page-directory. * If no pipelining is needed, then updates may be performed by the cpu. * diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index bfc3deebdaa2..07b27114be9a 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,7 +19,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 5, + .guc_log_level = 3, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, /* the rest are 0 by default */ diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 8dd55798ab31..eeb96b5f49e2 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -16,7 +16,6 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -28,7 +27,6 @@ #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc_pc.h" -#include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -74,12 +72,6 @@ struct xe_oa_config { struct rcu_head rcu; }; -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - struct xe_oa_open_param { struct xe_file *xef; u32 oa_unit_id; @@ -96,6 +88,8 @@ struct xe_oa_open_param { struct drm_xe_sync __user *syncs_user; int num_syncs; struct xe_sync_entry *syncs; + size_t oa_buffer_size; + int wait_num_reports; }; struct xe_oa_config_bo { @@ -240,11 +234,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 partial_report_size; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -288,8 +281,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); + pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -403,11 +396,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { - struct xe_mmio *mmio = &stream->gt->mmio; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + int size_exponent = __ffs(stream->oa_buffer.bo->size); + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; + struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; + /* + * If oa buffer size is more than 16MB (exponent greater than 24), the + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. + */ + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); @@ -596,19 +597,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } +static void xe_oa_lock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + down_read(&q->vm->lock); + xe_vm_lock(q->vm, false); + } +} + +static void xe_oa_unlock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + xe_vm_unlock(q->vm); + up_read(&q->vm->lock); + } +} + static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, struct xe_bb *bb) { + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; struct xe_sched_job *job; struct dma_fence *fence; int err = 0; - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); + xe_oa_lock_vma(q); + + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { err = PTR_ERR(job); goto exit; } + job->ggtt = true; if (deps == XE_OA_SUBMIT_ADD_DEPS) { for (int i = 0; i < stream->num_syncs && !err; i++) @@ -623,10 +643,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); + xe_oa_unlock_vma(q); + return fence; err_put_job: xe_sched_job_put(job); exit: + xe_oa_unlock_vma(q); return ERR_PTR(err); } @@ -675,63 +698,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) dma_fence_put(stream->last_fence); } -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); -} - -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) { struct dma_fence *fence; struct xe_bb *bb; int err; - bb = xe_bb_new(stream->gt, 4 * count, false); + bb = xe_bb_new(stream->gt, 2 * count + 1, false); if (IS_ERR(bb)) { err = PTR_ERR(bb); goto exit; } - xe_oa_store_flex(stream, lrc, bb, flex, count); - - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_bb; - } - xe_bb_free(bb, fence); - dma_fence_put(fence); - - return 0; -free_bb: - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct dma_fence *fence; - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - write_cs_mi_lri(bb, reg_lri, 1); + write_cs_mi_lri(bb, reg_lri, count); fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); if (IS_ERR(fence)) { @@ -751,71 +730,55 @@ exit: static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAR_OACONTROL, + oacontrol, + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) }, }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAC_OACONTROL, + oacontrol + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; /* Set ccs select to enable programming of OAC_OACONTROL */ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) @@ -901,15 +864,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_file_put(stream->xef); } -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + size, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1087,6 +1047,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } +static u32 oag_buf_size_select(const struct xe_oa_stream *stream) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, + stream->oa_buffer.bo->size > SZ_16M ? + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); +} + static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; @@ -1119,6 +1086,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | oag_report_ctx_switches(stream) | + oag_buf_size_select(stream) | oag_configure_mmio_trigger(stream, true)); xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? @@ -1260,6 +1228,28 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); + return -EINVAL; + } + param->oa_buffer_size = value; + return 0; +} + +static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!value) { + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); + return -EINVAL; + } + param->wait_num_reports = value; + return 0; +} + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { @@ -1280,6 +1270,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, }; static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { @@ -1294,6 +1286,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, @@ -1553,7 +1547,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1639,7 +1633,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,81 +1671,6 @@ static const struct file_operations xe_oa_fops = { .mmap = xe_oa_mmap, }; -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -1770,6 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->wait_num_reports = param->wait_num_reports; stream->xef = xe_file_get(param->xef); stream->num_syncs = param->num_syncs; @@ -1783,20 +1703,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, if (GRAPHICS_VER(stream->oa->xe) >= 20 && stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + param->oa_buffer_size - + param->oa_buffer_size % stream->oa_buffer.format->size; else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; - - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } + stream->oa_buffer.circ_size = param->oa_buffer_size; stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) { @@ -1828,7 +1738,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto err_fw_put; } - ret = xe_oa_alloc_oa_buffer(stream); + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); if (ret) goto err_fw_put; @@ -2066,8 +1976,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) + return -EOPNOTSUPP; } /* @@ -2125,6 +2035,17 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + if (!param.oa_buffer_size) + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + + if (!param.wait_num_reports) + param.wait_num_reports = 1; + if (param.wait_num_reports > param.oa_buffer_size / f->size) { + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); + ret = -EINVAL; + goto err_exec_q; + } + ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q; @@ -2242,6 +2163,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index fea9d981e414..52e33c37d5ee 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,7 +15,7 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" -#define XE_OA_BUFFER_SIZE SZ_16M +#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { HDR_32_BIT = 0, @@ -138,9 +138,6 @@ struct xe_oa { /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ struct idr metrics_idr; - /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ - u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; - /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; @@ -218,6 +215,9 @@ struct xe_oa_stream { /** @pollin: Whether there is data available to read */ bool pollin; + /** @wait_num_reports: Number of reports to wait for before signalling pollin */ + int wait_num_reports; + /** @periodic: Whether periodic sampling is currently enabled */ bool periodic; diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index 8ec1b84cbb9e..57cf01efc07f 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -56,7 +56,7 @@ int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } } -static struct ctl_table observation_ctl_table[] = { +static const struct ctl_table observation_ctl_table[] = { { .procname = "observation_paranoid", .data = &xe_observation_paranoid, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6b7f77425c7f..39be74848e44 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -490,7 +490,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, * least basic xe_gt and xe_guc initialization. * * Since to obtain the value of GMDID_MEDIA we need to use the - * media GuC, temporarly tweak the gt type. + * media GuC, temporarily tweak the gt type. */ xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -781,7 +781,7 @@ static void xe_pci_remove(struct pci_dev *pdev) * error injectable functions is proper handling of the error code by the * caller for recovery, which is always the case here. The second * requirement is that no state is changed before the first error return. - * It is not strictly fullfilled for all initialization functions using the + * It is not strictly fulfilled for all initialization functions using the * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those * error cases at probe time, the error code is simply propagated up by the * caller. Therefore there is no consequence on those specific callers when diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index d95d9835de42..9333ce776a6e 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -217,7 +217,7 @@ out: * * It returns 0 on success, and -ERROR number on failure, -EINVAL if max * frequency is higher then the minimal, and other errors directly translated - * from the PCODE Error returs: + * from the PCODE Error returns: * - -ENXIO: "Illegal Command" * - -ETIMEDOUT: "Timed out" * - -EINVAL: "Illegal Data" diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 40f7c844ed44..c9cc0c091dfd 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -7,6 +7,7 @@ #include <linux/fault-inject.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include <drm/drm_managed.h> #include <drm/ttm/ttm_placement.h> @@ -390,7 +391,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify - * also checks and delets bo entry from user fault list. + * also checks and deletes bo entry from user fault list. */ mutex_lock(&xe->mem_access.vram_userfault.lock); list_for_each_entry_safe(bo, on, @@ -414,8 +415,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); - if (xe->d3cold.allowed) - xe_display_pm_suspend_late(xe); + xe_display_pm_runtime_suspend_late(xe); + out: if (err) xe_display_pm_runtime_resume(xe); @@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) struct device *dev = xe->drm.dev; return dev->power.runtime_status == RPM_SUSPENDING || - dev->power.runtime_status == RPM_RESUMING; + dev->power.runtime_status == RPM_RESUMING || + pm_suspend_target_state != PM_SUSPEND_ON; #else return false; #endif @@ -738,9 +740,6 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) xe->d3cold.allowed = false; mutex_unlock(&xe->d3cold.lock); - - drm_dbg(&xe->drm, - "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } /** diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f27f579f4d85..1ddcc7e79a93 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -136,6 +136,7 @@ err_kfree: xe_pt_free(pt); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); /** * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero @@ -275,7 +276,7 @@ struct xe_pt_stage_bind_walk { /* Also input, but is updated during the walk*/ /** @curs: The DMA address cursor. */ struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ + /** @va_curs_start: The Virtual address corresponding to @curs->start */ u64 va_curs_start; /* Output */ @@ -1333,8 +1334,7 @@ static void invalidation_fence_cb(struct dma_fence *fence, queue_work(system_wq, &ifence->work); } else { ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); + xe_gt_tlb_invalidation_fence_signal(&ifence->base); } dma_fence_put(ifence->fence); } @@ -1851,6 +1851,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) return 0; } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2131,6 +2132,7 @@ kill_vm_tile1: return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); /** * xe_pt_update_ops_fini() - Finish PT update operations diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 170ae72d1a7b..c059639613f7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -23,6 +23,7 @@ #include "xe_guc_hwconfig.h" #include "xe_macros.h" #include "xe_mmio.h" +#include "xe_oa.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" @@ -670,7 +671,9 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_id = u->oa_unit_id; du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); - du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | + DRM_XE_OA_CAPS_OA_BUFFER_SIZE | + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index e1a0e27cda14..9475e3f74958 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -24,49 +24,29 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" #include "xe_mmio.h" -#include "xe_reg_whitelist.h" #include "xe_rtp_types.h" -#define XE_REG_SR_GROW_STEP_DEFAULT 16 - static void reg_sr_fini(struct drm_device *drm, void *arg) { struct xe_reg_sr *sr = arg; + struct xe_reg_sr_entry *entry; + unsigned long reg; + + xa_for_each(&sr->xa, reg, entry) + kfree(entry); xa_destroy(&sr->xa); - kfree(sr->pool.arr); - memset(&sr->pool, 0, sizeof(sr->pool)); } int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) { xa_init(&sr->xa); - memset(&sr->pool, 0, sizeof(sr->pool)); - sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; sr->name = name; return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); } EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); -static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) -{ - if (sr->pool.used == sr->pool.allocated) { - struct xe_reg_sr_entry *arr; - - arr = krealloc_array(sr->pool.arr, - ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), - sizeof(*arr), GFP_KERNEL); - if (!arr) - return NULL; - - sr->pool.arr = arr; - sr->pool.allocated += sr->pool.grow_step; - } - - return &sr->pool.arr[sr->pool.used++]; -} - static bool compatible_entries(const struct xe_reg_sr_entry *e1, const struct xe_reg_sr_entry *e2) { @@ -112,7 +92,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, return 0; } - pentry = alloc_entry(sr); + pentry = kmalloc(sizeof(*pentry), GFP_KERNEL); if (!pentry) { ret = -ENOMEM; goto fail; @@ -211,58 +191,6 @@ err_force_wake: xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); } -void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) -{ - struct xe_reg_sr *sr = &hwe->reg_whitelist; - struct xe_gt *gt = hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - struct xe_reg_sr_entry *entry; - struct drm_printer p; - u32 mmio_base = hwe->mmio_base; - unsigned long reg; - unsigned int slot = 0; - unsigned int fw_ref; - - if (xa_empty(&sr->xa)) - return; - - drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_force_wake; - - p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); - xa_for_each(&sr->xa, reg, entry) { - if (slot == RING_MAX_NONPRIV_SLOTS) { - xe_gt_err(gt, - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", - hwe->name, RING_MAX_NONPRIV_SLOTS); - break; - } - - xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), - reg | entry->set_bits); - slot++; - } - - /* And clear the rest just in case of garbage */ - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { - u32 addr = RING_NOPID(mmio_base).addr; - - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); - } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); -} - /** * xe_reg_sr_dump - print all save/restore entries * @sr: Save/restore entries diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index ad48a52b824a..ebe11f237fa2 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -20,12 +20,6 @@ struct xe_reg_sr_entry { }; struct xe_reg_sr { - struct { - struct xe_reg_sr_entry *arr; - unsigned int used; - unsigned int allocated; - unsigned int grow_step; - } pool; struct xarray xa; const char *name; diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 3996934974fa..edab5d4e3ba5 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -10,7 +10,9 @@ #include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" #include "xe_gt_types.h" +#include "xe_gt_printk.h" #include "xe_platform_types.h" +#include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_step.h" @@ -89,6 +91,40 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { {} }; +static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) +{ + struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_reg_sr_entry *entry; + struct drm_printer p; + unsigned long reg; + unsigned int slot; + + xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name); + p = xe_gt_dbg_printer(hwe->gt); + + slot = 0; + xa_for_each(&sr->xa, reg, entry) { + struct xe_reg_sr_entry hwe_entry = { + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot), + .set_bits = entry->reg.addr | entry->set_bits, + .clr_bits = ~0u, + .read_mask = entry->read_mask, + }; + + if (slot == RING_MAX_NONPRIV_SLOTS) { + xe_gt_err(hwe->gt, + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", + hwe->name, RING_MAX_NONPRIV_SLOTS); + break; + } + + xe_reg_whitelist_print_entry(&p, 0, reg, entry); + xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt); + + slot++; + } +} + /** * xe_reg_whitelist_process_engine - process table of registers to whitelist * @hwe: engine instance to process whitelist for @@ -102,6 +138,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + whitelist_apply_to_hwe(hwe); } /** diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 0be4f489d3e1..9f327f27c072 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return job->q->vm ? BIT(8) : 0; + if (job->q->vm && !job->ggtt) + return BIT(8); + + return 0; } static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b13d4d62f0b1..7a1c78fdfc92 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -340,3 +340,8 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, return dss >= dss_per_gslice; } +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return !IS_SRIOV_VF(gt_to_xe(gt)); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 827d932b6908..38b9f13bba5e 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -131,7 +131,7 @@ struct xe_reg_sr; * @ver_end__: Last graphics IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -169,7 +169,7 @@ struct xe_reg_sr; * @ver_end__: Last media IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -476,4 +476,15 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/* + * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device + * + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if device is not VF, false otherwise. + */ +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index f13f333f00be..d942b20a9f29 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -56,6 +56,8 @@ struct xe_sched_job { u32 migrate_flush_flags; /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ bool ring_ops_flush_tlb; + /** @ggtt: mapped in ggtt. */ + bool ggtt; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index ef10782af656..04e2f539ccd9 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -14,6 +14,7 @@ #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_vf.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe) return err; } + if (IS_SRIOV_VF(xe)) + xe_sriov_vf_init_early(xe); + xe_assert(xe, !xe->sriov.wq); xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); if (!xe->sriov.wq) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 7d156ba82479..dd1df950b021 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -20,7 +20,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_sriov_pf_assert_vfid(xe, vfid) \ xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index c7b7ad4af5c8..ca94382a721e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -9,6 +9,7 @@ #include <linux/build_bug.h> #include <linux/mutex.h> #include <linux/types.h> +#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -56,4 +57,20 @@ struct xe_device_pf { struct mutex master_lock; }; +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c new file mode 100644 index 000000000000..c1275e64aa9c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_vf.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" + +/** + * DOC: VF restore procedure in PF KMD and VF KMD + * + * Restoring previously saved state of a VF is one of core features of + * SR-IOV. All major VM Management applications allow saving and restoring + * the VM state, and doing that to a VM which uses SRIOV VF as one of + * the accessible devices requires support from KMD on both PF and VF side. + * VMM initiates all required operations through VFIO module, which then + * translates them into PF KMD calls. This description will focus on these + * calls, leaving out the module which initiates these steps (VFIO). + * + * In order to start the restore procedure, GuC needs to keep the VF in + * proper state. The PF driver can ensure GuC set it to VF_READY state + * by provisioning the VF, which in turn can be done after Function Level + * Reset of said VF (or after it was freshly created - in that case FLR + * is not needed). The FLR procedure ends with GuC sending message + * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. + * After the provisioning is completed, the VF needs to be paused, and + * at that point the actual restore can begin. + * + * During VF Restore, state of several resources is restored. These may + * include local memory content (system memory is restored by VMM itself), + * values of MMIO registers, stateless compression metadata and others. + * The final resource which also needs restoring is state of the VF + * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` + * message is used, with reference to the state blob to be consumed by + * GuC. + * + * Next, when VFIO is asked to set the VM into running state, the PF driver + * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this + * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the + * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform + * the VF KMD within the VM that it was migrated. + * + * As soon as Virtual GPU of the VM starts, the VF driver within receives + * the MIGRATED interrupt and schedules post-migration recovery worker. + * That worker queries GuC for new provisioning (using MMIO communication), + * and applies fixups to any non-virtualized resources used by the VF. + * + * When the VF driver is ready to continue operation on the newly connected + * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to + * enter the long awaited `VF_RUNNING` state, and therefore start handling + * CTB messages and scheduling workloads from the VF:: + * + * PF GuC VF + * [ ] | | + * [ ] PF2GUC_VF_CONTROL(pause) | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_READY_PAUSED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] success [ ] | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] PF loads resources from the | | + * [ ]------- saved image supplied | | + * [ ] | | | + * [ ] <----- | | + * [ ] | | + * [ ] GUC_PF_OPCODE_VF_RESTORE | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC loads contexts and CTB | + * [ ] [ ]------- state from image | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_PAUSED | + * [ ] [ ] | | + * [ ] success [ ] <----- | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] GUC_PF_TRIGGER_VF_RESUME | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_BLOCKED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GUC_INTR_SW_INT_0 | + * [ ] success [ ]---------------------------> [ ] + * [ ] <---------------------------[ ] [ ] + * | | VF2GUC_QUERY_SINGLE_KLV [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] new VF provisioning [ ] + * | [ ]---------------------------> [ ] + * | | [ ] + * | | VF driver applies post [ ] + * | | migration fixups -------[ ] + * | | | [ ] + * | | -----> [ ] + * | | [ ] + * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] GuC sets new VF state to [ ] + * | [ ]------- VF_RUNNING [ ] + * | [ ] | [ ] + * | [ ] <----- [ ] + * | [ ] success [ ] + * | [ ]---------------------------> [ ] + * | | | + * | | | + */ + +static void migration_worker_func(struct work_struct *w); + +/** + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. + * @xe: the &xe_device to initialize + */ +void xe_sriov_vf_init_early(struct xe_device *xe) +{ + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); +} + +/** + * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. + * @xe: the &xe_device struct instance + * + * After migration, we need to re-query all VF configuration to make sure + * they match previous provisioning. Note that most of VF provisioning + * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int vf_post_migration_requery_guc(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err, ret = 0; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_vf_query_config(gt); + ret = ret ?: err; + } + + return ret; +} + +/* + * vf_post_migration_imminent - Check if post-restore recovery is coming. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_post_migration_imminent(struct xe_device *xe) +{ + return xe->sriov.vf.migration.gt_flags != 0 || + work_pending(&xe->sriov.vf.migration.worker); +} + +/* + * Notify all GuCs about resource fixups apply finished. + */ +static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (vf_post_migration_imminent(xe)) + goto skip; + xe_gt_sriov_vf_notify_resfix_done(gt); + } + return; + +skip: + drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +} + +static void vf_post_migration_recovery(struct xe_device *xe) +{ + int err; + + drm_dbg(&xe->drm, "migration recovery in progress\n"); + xe_pm_runtime_get(xe); + err = vf_post_migration_requery_guc(xe); + if (vf_post_migration_imminent(xe)) + goto defer; + if (unlikely(err)) + goto fail; + + /* FIXME: add the recovery steps */ + vf_post_migration_notify_resfix_done(xe); + xe_pm_runtime_put(xe); + drm_notice(&xe->drm, "migration recovery ended\n"); + return; +defer: + xe_pm_runtime_put(xe); + drm_dbg(&xe->drm, "migration recovery deferred\n"); + return; +fail: + xe_pm_runtime_put(xe); + drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); + xe_device_declare_wedged(xe); +} + +static void migration_worker_func(struct work_struct *w) +{ + struct xe_device *xe = container_of(w, struct xe_device, + sriov.vf.migration.worker); + + vf_post_migration_recovery(xe); +} + +static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { + xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); + return false; + } + } + return true; +} + +/** + * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. + */ +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) +{ + bool started; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + if (!vf_ready_to_recovery_on_all_gts(xe)) + return; + + WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); + /* Ensure other threads see that no flags are set now. */ + smp_mb(); + + started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); + drm_info(&xe->drm, "VF migration recovery %s\n", started ? + "scheduled" : "already in progress"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h new file mode 100644 index 000000000000..7b8622cff2b7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_H_ +#define _XE_SRIOV_VF_H_ + +struct xe_device; + +void xe_sriov_vf_init_early(struct xe_device *xe); +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 91130ad8999c..d5281de04d54 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -211,6 +211,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __string(dev, __dev_name_eq(job->q)) __field(u32, seqno) __field(u32, lrc_seqno) + __field(u8, gt_id) __field(u16, guc_id) __field(u32, guc_state) __field(u32, flags) @@ -223,6 +224,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __assign_str(dev); __entry->seqno = xe_sched_job_seqno(job); __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); + __entry->gt_id = job->q->gt->info.id; __entry->guc_id = job->q->guc->id; __entry->guc_state = atomic_read(&job->q->guc->state); @@ -232,9 +234,9 @@ DECLARE_EVENT_CLASS(xe_sched_job, __entry->batch_addr = (u64)job->ptrs[0].batch_addr; ), - TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", __get_str(dev), __entry->fence, __entry->seqno, - __entry->lrc_seqno, __entry->guc_id, + __entry->lrc_seqno, __entry->gt_id, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) ); @@ -282,6 +284,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg, __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) __field(u32, opcode) __field(u16, guc_id) + __field(u8, gt_id) ), TP_fast_assign( @@ -289,9 +292,11 @@ DECLARE_EVENT_CLASS(xe_sched_msg, __entry->opcode = msg->opcode; __entry->guc_id = ((struct xe_exec_queue *)msg->private_data)->guc->id; + __entry->gt_id = + ((struct xe_exec_queue *)msg->private_data)->gt->info.id; ), - TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, + TP_printk("dev=%s, gt=%u guc_id=%d, opcode=%u", __get_str(dev), __entry->gt_id, __entry->guc_id, __entry->opcode) ); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 30a3cfbaaa09..ea50fee50c7d 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -48,6 +48,11 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); +DEFINE_EVENT(xe_bo, xe_bo_validate, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + TRACE_EVENT(xe_bo_move, TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, bool move_lacks_source), @@ -55,8 +60,8 @@ TRACE_EVENT(xe_bo_move, TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) + __string(new_placement_name, xe_mem_type_to_name[new_placement]) + __string(old_placement_name, xe_mem_type_to_name[old_placement]) __string(device_id, __dev_name_bo(bo)) __field(bool, move_lacks_source) ), @@ -64,15 +69,15 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; + __assign_str(new_placement_name); + __assign_str(old_placement_name); __assign_str(device_id); __entry->move_lacks_source = move_lacks_source; ), TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) + __get_str(old_placement_name), + __get_str(new_placement_name), __get_str(device_id)) ); DECLARE_EVENT_CLASS(xe_vma, diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.c b/drivers/gpu/drm/xe/xe_trace_lrc.c new file mode 100644 index 000000000000..ab9b7e2970bc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_lrc.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h new file mode 100644 index 000000000000..5c669a0b2180 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_LRC_H_ + +#include <linux/tracepoint.h> +#include <linux/types.h> + +#include "xe_gt_types.h" +#include "xe_lrc.h" +#include "xe_lrc_types.h" + +#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) + +TRACE_EVENT(xe_lrc_update_timestamp, + TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_ARGS(lrc, old), + TP_STRUCT__entry( + __field(struct xe_lrc *, lrc) + __field(u32, old) + __field(u32, new) + __string(name, lrc->fence_ctx.name) + __string(device_id, __dev_name_lrc(lrc)) + ), + + TP_fast_assign( + __entry->lrc = lrc; + __entry->old = old; + __entry->new = lrc->ctx_timestamp; + __assign_str(name); + __assign_str(device_id); + ), + TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + __entry->lrc, __get_str(name), + __entry->old, __entry->new, + __get_str(device_id)) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_lrc +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 423b261ea743..f4a16e5fa770 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -5,6 +5,7 @@ */ #include <drm/drm_managed.h> +#include <drm/drm_drv.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> @@ -52,7 +53,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres; struct drm_buddy *mm = &mgr->mm; - u64 size, remaining_size, min_page_size; + u64 size, min_page_size; unsigned long lpfn; int err; @@ -98,17 +99,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */ - err = -EINVAL; - goto error_fini; - } - - if (WARN_ON((size > SZ_2G && - (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) { - err = -EINVAL; - goto error_fini; - } - if (WARN_ON(!IS_ALIGNED(size, min_page_size))) { err = -EINVAL; goto error_fini; @@ -116,12 +106,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, mutex_lock(&mgr->lock); if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) { - mutex_unlock(&mgr->lock); err = -ENOSPC; - goto error_fini; + goto error_unlock; } - if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn && + if (place->fpfn + (size >> PAGE_SHIFT) != lpfn && place->flags & TTM_PL_FLAG_CONTIGUOUS) { size = roundup_pow_of_two(size); min_page_size = size; @@ -129,25 +118,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); } - remaining_size = size; - do { - /* - * Limit maximum size to 2GiB due to SG table limitations. - * FIXME: Should maybe be handled as part of sg construction. - */ - u64 alloc_size = min_t(u64, remaining_size, SZ_2G); - - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, - (u64)lpfn << PAGE_SHIFT, - alloc_size, - min_page_size, - &vres->blocks, - vres->flags); - if (err) - goto error_free_blocks; - - remaining_size -= alloc_size; - } while (remaining_size); + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + (u64)lpfn << PAGE_SHIFT, size, + min_page_size, &vres->blocks, vres->flags); + if (err) + goto error_unlock; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) @@ -194,9 +169,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, *res = &vres->base; return 0; - -error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks, 0); +error_unlock: mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -339,6 +312,13 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, struct ttm_resource_manager *man = &mgr->manager; int err; + if (mem_type != XE_PL_STOLEN) { + const char *name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1"; + man->cg = drmm_cgroup_register_region(&xe->drm, name, size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); + } + man->func = &xe_ttm_vram_mgr_func; mgr->mem_type = mem_type; mutex_init(&mgr->lock); @@ -393,7 +373,8 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, xe_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; - xe_res_next(&cursor, cursor.size); + /* Limit maximum size to 2GiB due to SG table limitations. */ + xe_res_next(&cursor, min_t(u64, cursor.size, SZ_2G)); } r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); @@ -413,7 +394,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { phys_addr_t phys = cursor.start + tile->mem.vram.io_start; - size_t size = cursor.size; + size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr; addr = dma_map_resource(dev, phys, size, dir, @@ -426,7 +407,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, sg_dma_address(sg) = addr; sg_dma_len(sg) = size; - xe_res_next(&cursor, cursor.size); + xe_res_next(&cursor, size); } return 0; diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 0d8caa0e7354..ad3b35a0e6eb 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -92,7 +92,7 @@ struct xe_uc_fw { const enum xe_uc_fw_status status; /** * @__status: private firmware load status - only to be used - * by firmware laoding code + * by firmware loading code */ enum xe_uc_fw_status __status; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 00ea57c2f4b9..690330352d4c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -732,13 +732,14 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) vops->pt_update_ops[i].ops = kmalloc_array(vops->pt_update_ops[i].num_ops, sizeof(*vops->pt_update_ops[i].ops), - GFP_KERNEL); + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!vops->pt_update_ops[i].ops) return array_of_binds ? -ENOBUFS : -ENOMEM; } return 0; } +ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); static void xe_vma_ops_fini(struct xe_vma_ops *vops) { @@ -1023,7 +1024,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) /* * Since userptr pages are not pinned, we can't remove - * the notifer until we're sure the GPU is not accessing + * the notifier until we're sure the GPU is not accessing * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); @@ -1351,6 +1352,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, return 0; } +ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); static void xe_vm_free_scratch(struct xe_vm *vm) { @@ -1977,6 +1979,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, return ops; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, u16 pat_index, unsigned int flags) @@ -2104,7 +2107,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) } } - /* Adjust for partial unbind after removin VMA from VM */ + /* Adjust for partial unbind after removing VMA from VM */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; @@ -2356,13 +2359,15 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); int err = 0; if (bo) { if (!bo->vm) err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) - err = xe_bo_validate(bo, xe_vma_vm(vma), true); + err = xe_bo_validate(bo, vm, + !xe_vm_in_preempt_fence_mode(vm)); } return err; @@ -2696,6 +2701,7 @@ unlock: drm_exec_fini(&exec); return err; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); #define SUPPORTED_FLAGS_STUB \ (DRM_XE_VM_BIND_FLAG_READONLY | \ @@ -2732,7 +2738,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, *bind_ops = kvmalloc_array(args->num_binds, sizeof(struct drm_xe_vm_bind_op), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; @@ -2972,14 +2979,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (args->num_binds) { bos = kvcalloc(args->num_binds, sizeof(*bos), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!bos) { err = -ENOMEM; goto release_vm_lock; } ops = kvcalloc(args->num_binds, sizeof(*ops), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3302,7 +3311,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) for (int i = 0; i < snap->num_snaps; i++) { struct xe_bo *bo = snap->snap[i].bo; - struct iosys_map src; int err; if (IS_ERR(snap->snap[i].data)) @@ -3315,16 +3323,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) } if (bo) { - xe_bo_lock(bo, false); - err = ttm_bo_vmap(&bo->ttm, &src); - if (!err) { - xe_map_memcpy_from(xe_bo_device(bo), - snap->snap[i].data, - &src, snap->snap[i].bo_ofs, - snap->snap[i].len); - ttm_bo_vunmap(&bo->ttm, &src); - } - xe_bo_unlock(bo); + err = xe_bo_read(bo, snap->snap[i].bo_ofs, + snap->snap[i].data, snap->snap[i].len); } else { void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 4d33f310b653..078786958403 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -64,8 +64,8 @@ * update page level 2 PDE[1] to page level 3b phys address (GPU) * * bind BO2 0x1ff000-0x201000 - * update page level 3a PTE[511] to BO2 phys addres (GPU) - * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) + * update page level 3a PTE[511] to BO2 phys address (GPU) + * update page level 3b PTE[0] to BO2 phys address + 0x1000 (GPU) * * GPU bypass * ~~~~~~~~~~ @@ -192,7 +192,7 @@ * * If a VM is in fault mode (TODO: link to fault mode), new bind operations that * create mappings are by default deferred to the page fault handler (first - * use). This behavior can be overriden by setting the flag + * use). This behavior can be overridden by setting the flag * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping * immediately. * @@ -209,7 +209,7 @@ * * Since this a core kernel managed memory the kernel can move this memory * whenever it wants. We register an invalidation MMU notifier to alert XE when - * a user poiter is about to move. The invalidation notifier needs to block + * a user pointer is about to move. The invalidation notifier needs to block * until all pending users (jobs or compute mode engines) of the userptr are * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. * @@ -252,7 +252,7 @@ * Rebind worker * ------------- * - * The rebind worker is very similar to an exec. It is resposible for rebinding + * The rebind worker is very similar to an exec. It is responsible for rebinding * evicted BOs or userptrs, waiting on those operations, installing new preempt * fences, and finally resuming executing of engines in the VM. * @@ -317,11 +317,11 @@ * are not allowed, only long running workloads and ULLS are enabled on a faulting * VM. * - * Defered VM binds + * Deferred VM binds * ---------------- * * By default, on a faulting VM binds just allocate the VMA and the actual - * updating of the page tables is defered to the page fault handler. This + * updating of the page tables is deferred to the page fault handler. This * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in * the VM bind which will then do the bind immediately. * @@ -500,18 +500,18 @@ * Slot waiting * ------------ * - * 1. The exection of all jobs from kernel ops shall wait on all slots + * 1. The execution of all jobs from kernel ops shall wait on all slots * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if * kernel op is operating on external or private BO) * - * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall + * 2. In non-compute mode, the execution of all jobs from rebinds in execs shall * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM * (depends on if the rebind is operatiing on an external or private BO) * - * 3. In non-compute mode, the exection of all jobs from execs shall wait on the + * 3. In non-compute mode, the execution of all jobs from execs shall wait on the * last rebind job * - * 4. In compute mode, the exection of all jobs from rebinds in the rebind + * 4. In compute mode, the execution of all jobs from rebinds in the rebind * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO * or VM (depends on if rebind is operating on external or private BO) * diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c new file mode 100644 index 000000000000..b378848d3b7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright © 2024 Intel Corporation */ +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/intel_vsec.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/types.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_drv.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_pm.h" +#include "xe_vsec.h" + +#include "regs/xe_pmt.h" + +/* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */ +#define BMG_DEVICE_ID 0xE2F8 + +static struct intel_vsec_header bmg_telemetry = { + .length = 0x10, + .id = VSEC_ID_TELEMETRY, + .num_entries = 2, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET, +}; + +static struct intel_vsec_header bmg_punit_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x60, +}; + +static struct intel_vsec_header bmg_oobmsm_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x78, +}; + +static struct intel_vsec_header *bmg_capabilities[] = { + &bmg_telemetry, + &bmg_punit_crashlog, + &bmg_oobmsm_crashlog, + NULL +}; + +enum xe_vsec { + XE_VSEC_UNKNOWN = 0, + XE_VSEC_BMG, +}; + +static struct intel_vsec_platform_info xe_vsec_info[] = { + [XE_VSEC_BMG] = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG, + .headers = bmg_capabilities, + }, + { } +}; + +/* + * The GUID will have the following bits to decode: + * [0:3] - {Telemetry space iteration number (0,1,..)} + * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2) + * [8:11] - SOC_SKU + * [12:27] – Device ID – changes for each down bin SKU’s + * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2) + * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1) + */ +#define GUID_TELEM_ITERATION GENMASK(3, 0) +#define GUID_SEGMENT GENMASK(7, 4) +#define GUID_SOC_SKU GENMASK(11, 8) +#define GUID_DEVICE_ID GENMASK(27, 12) +#define GUID_CAP_TYPE GENMASK(29, 28) +#define GUID_RECORD_ID GENMASK(31, 30) + +#define PUNIT_TELEMETRY_OFFSET 0x0200 +#define PUNIT_WATCHER_OFFSET 0x14A0 +#define OOBMSM_0_WATCHER_OFFSET 0x18D8 +#define OOBMSM_1_TELEMETRY_OFFSET 0x1000 + +enum record_id { + PUNIT, + OOBMSM_0, + OOBMSM_1, +}; + +enum capability { + CRASHLOG, + TELEMETRY, + WATCHER, +}; + +static int xe_guid_decode(u32 guid, int *index, u32 *offset) +{ + u32 record_id = FIELD_GET(GUID_RECORD_ID, guid); + u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid); + u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid); + + if (device_id != BMG_DEVICE_ID) + return -ENODEV; + + if (cap_type > WATCHER) + return -EINVAL; + + *offset = 0; + + if (cap_type == CRASHLOG) { + *index = record_id == PUNIT ? 2 : 4; + return 0; + } + + switch (record_id) { + case PUNIT: + *index = 0; + if (cap_type == TELEMETRY) + *offset = PUNIT_TELEMETRY_OFFSET; + else + *offset = PUNIT_WATCHER_OFFSET; + break; + + case OOBMSM_0: + *index = 1; + if (cap_type == WATCHER) + *offset = OOBMSM_0_WATCHER_OFFSET; + break; + + case OOBMSM_1: + *index = 1; + if (cap_type == TELEMETRY) + *offset = OOBMSM_1_TELEMETRY_OFFSET; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; + u32 mem_region; + u32 offset; + int ret; + + ret = xe_guid_decode(guid, &mem_region, &offset); + if (ret) + return ret; + + telem_addr += offset + user_offset; + + guard(mutex)(&xe->pmt.lock); + + /* indicate that we are not at an appropriate power level */ + if (!xe_pm_runtime_get_if_active(xe)) + return -ENODATA; + + /* set SoC re-mapper index register based on GUID memory region */ + xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS, + REG_FIELD_PREP(SG_REMAP_BITS, mem_region)); + + memcpy_fromio(data, telem_addr, count); + xe_pm_runtime_put(xe); + + return count; +} + +static struct pmt_callbacks xe_pmt_cb = { + .read_telem = xe_pmt_telem_read, +}; + +static const int vsec_platforms[] = { + [XE_BATTLEMAGE] = XE_VSEC_BMG, +}; + +static enum xe_vsec get_platform_info(struct xe_device *xe) +{ + if (xe->info.platform > XE_BATTLEMAGE) + return XE_VSEC_UNKNOWN; + + return vsec_platforms[xe->info.platform]; +} + +/** + * xe_vsec_init - Initialize resources and add intel_vsec auxiliary + * interface + * @xe: valid xe instance + */ +void xe_vsec_init(struct xe_device *xe) +{ + struct intel_vsec_platform_info *info; + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + enum xe_vsec platform; + + platform = get_platform_info(xe); + if (platform == XE_VSEC_UNKNOWN) + return; + + info = &xe_vsec_info[platform]; + if (!info->headers) + return; + + switch (platform) { + case XE_VSEC_BMG: + info->priv_data = &xe_pmt_cb; + break; + default: + break; + } + + /* + * Register a VSEC. Cleanup is handled using device managed + * resources. + */ + intel_vsec_register(pdev, info); +} +MODULE_IMPORT_NS("INTEL_VSEC"); diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h new file mode 100644 index 000000000000..5777c53faec2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright © 2024 Intel Corporation */ + +#ifndef _XE_VSEC_H_ +#define _XE_VSEC_H_ + +struct xe_device; + +void xe_vsec_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 02cf647f86d8..570fe0376402 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -607,6 +607,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, + { XE_RTP_NAME("16024792527"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, + SMP_FORCE_128B_OVERFETCH)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index bcd04464b85e..40438c3d9b72 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,3 +1,4 @@ +1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) @@ -33,7 +34,7 @@ GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) - MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) 14019789679 GRAPHICS_VERSION(1255) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 0b63fd48ea92..979f6d3239ba 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1564,7 +1564,7 @@ static void zynqmp_dp_bridge_atomic_enable(struct drm_bridge *bridge, pm_runtime_get_sync(dp->dev); - mutex_lock(&dp->lock); + guard(mutex)(&dp->lock); zynqmp_dp_disp_enable(dp, old_bridge_state); /* @@ -1624,7 +1624,6 @@ static void zynqmp_dp_bridge_atomic_enable(struct drm_bridge *bridge, zynqmp_dp_write(dp, ZYNQMP_DP_SOFTWARE_RESET, ZYNQMP_DP_SOFTWARE_RESET_ALL); zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_ENABLE, 1); - mutex_unlock(&dp->lock); } static void zynqmp_dp_bridge_atomic_disable(struct drm_bridge *bridge, |