diff options
Diffstat (limited to 'drivers/gpu')
456 files changed, 11874 insertions, 5257 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 6f1cf235073d..fbef3f471bd0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -99,6 +99,7 @@ config DRM_KUNIT_TEST config DRM_KMS_HELPER tristate depends on DRM + select FB_CORE if DRM_FBDEV_EMULATION help CRTC helpers for KMS drivers. @@ -293,6 +294,8 @@ config DRM_TTM_HELPER tristate depends on DRM select DRM_TTM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Helpers for ttm-based gem objects @@ -300,6 +303,8 @@ config DRM_TTM_HELPER config DRM_GEM_DMA_HELPER tristate depends on DRM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Choose this if you need the GEM DMA helper functions @@ -307,6 +312,8 @@ config DRM_GEM_DMA_HELPER config DRM_GEM_SHMEM_HELPER tristate depends on DRM && MMU + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION help Choose this if you need the GEM shmem helper functions diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2e5732dfd425..2c1b38c5cfc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -715,8 +715,9 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && - ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { + if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) || + (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index cc66ebb7bae1..441568163e20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, uint32_t low, high; uint64_t queue_addr = 0; + if (!amdgpu_gpu_recovery) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); @@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t low, high, pipe_reset_data = 0; uint64_t queue_addr = 0; + if (!amdgpu_gpu_recovery) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5df21529b3b1..5cc5f59e3018 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1105,7 +1105,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) * We can't use gang submit on with reserved VMIDs when the VM changes * can't be invalidated by more than one engine at the same time. */ - if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) { for (i = 0; i < p->gang_size; ++i) { struct drm_sched_entity *entity = p->entities[i]; struct drm_gpu_scheduler *sched = entity->rq->sched; @@ -1189,7 +1189,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (!bo) continue; - amdgpu_vm_bo_invalidate(adev, bo, false); + amdgpu_vm_bo_invalidate(bo, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 36053b3d48b3..d100bb7a137c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,8 +2365,8 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, - ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s>\n", + adev->num_ip_blocks, ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks].adev = adev; @@ -6158,6 +6158,44 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, } /** + * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * AMD dGPU which may be a virtual upstream bridge. + */ +static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + parent = pci_upstream_bridge(parent); + if (parent && parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + while ((parent = pci_upstream_bridge(parent))) { + if (parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + } + } + } else { + /* use the device itself */ + *speed = pcie_get_speed_cap(adev->pdev); + *width = pcie_get_width_cap(adev->pdev); + } +} + +/** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer @@ -6168,9 +6206,8 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { - struct pci_dev *pdev; enum pci_bus_speed speed_cap, platform_speed_cap; - enum pcie_link_width platform_link_width; + enum pcie_link_width platform_link_width, link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -6192,11 +6229,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, &platform_link_width); + amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ - pdev = adev->pdev; - speed_cap = pcie_get_speed_cap(pdev); if (speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | @@ -6252,51 +6288,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { + /* asic caps */ + if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK; + } else { + switch (link_width) { + case PCIE_LNK_X32: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X16: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X12: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X8: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X4: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X2: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X1: + adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1; + break; + default: + break; + } + } + /* platform caps */ if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { switch (platform_link_width) { case PCIE_LNK_X32: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X16: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X12: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X8: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X4: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X2: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X1: - adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; + adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8e81a83d37d8..9f627caedc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -36,6 +36,7 @@ #include "amdgpu_gem.h" #include "amdgpu_dma_buf.h" #include "amdgpu_xgmi.h" +#include "amdgpu_vm.h" #include <drm/amdgpu_drm.h> #include <drm/ttm/ttm_tt.h> #include <linux/dma-buf.h> @@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; + amdgpu_vm_bo_update_shared(bo); + return 0; } @@ -345,7 +348,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) /* FIXME: This should be after the "if", but needs a fix to make sure * DMABuf imports are initialized in the right VM list. */ - amdgpu_vm_bo_invalidate(adev, bo, false); + amdgpu_vm_bo_invalidate(bo, false); if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 492b09d84571..817116e53d44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -280,7 +280,7 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) * Restrict the size of GTT domain (for userspace use) in MiB for testing. - * The default is -1 (Use 1/2 RAM, minimum value is 3GB). + * The default is -1 (Use value specified by TTM). */ MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); @@ -399,7 +399,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444); * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). */ MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); -module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); /** * DOC: bapm (int) @@ -457,7 +457,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); -module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444); /** * DOC: dc (int) @@ -568,14 +568,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); -module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** * DOC: emu_mode (int) * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). */ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); -module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444); /** * DOC: ras_enable (int) @@ -730,7 +730,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644); */ MODULE_PARM_DESC(force_asic_type, "A non negative value used to specify the asic type for all supported GPUs"); -module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); +module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444); /** * DOC: use_xgmi_p2p (int) @@ -749,7 +749,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444); * assigns queues to HQDs. */ int sched_policy = KFD_SCHED_POLICY_HWS; -module_param(sched_policy, int, 0444); +module_param_unsafe(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); @@ -799,7 +799,7 @@ MODULE_PARM_DESC(send_sigterm, * Setting 1 enables halt on hang. */ int halt_if_hws_hang; -module_param(halt_if_hws_hang, int, 0644); +module_param_unsafe(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); /** @@ -808,7 +808,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; -module_param(hws_gws_support, bool, 0444); +module_param_unsafe(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** @@ -841,7 +841,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa */ int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); -module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); +module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** @@ -849,7 +849,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm */ int amdgpu_mtype_local; MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); -module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); +module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) @@ -953,7 +953,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0644); +module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -1049,7 +1049,7 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * - 0x4: Disable GPU soft recovery, always do a full reset */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); -module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); /** * DOC: agp (int) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index df2cf5c33925..91d638098889 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { }; + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; ktime_t usage[AMDGPU_HW_IP_NUM]; const char *pl_name[] = { [TTM_PL_VRAM] = "vram", @@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [AMDGPU_PL_DOORBELL] = "doorbell", }; unsigned int hw_ip, i; - int ret; - - ret = amdgpu_bo_reserve(vm->root.bo, false); - if (ret) - return; - - amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats)); - amdgpu_bo_unreserve(vm->root.bo); + amdgpu_vm_get_memory(vm, stats); amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); /* @@ -114,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].evicted/1024UL); drm_printf(p, "amd-requested-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].requested/1024UL); + (stats[TTM_PL_VRAM].drm.shared + + stats[TTM_PL_VRAM].drm.private) / 1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", - stats[TTM_PL_TT].requested/1024UL); + (stats[TTM_PL_TT].drm.shared + + stats[TTM_PL_TT].drm.private) / 1024UL); for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { if (!usage[hw_ip]) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 2d4b67175b55..328a1b963548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3)) + return 0; + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 1a5df8b94661..69429df09477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -42,6 +42,7 @@ #include "amdgpu_dma_buf.h" #include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" +#include "amdgpu_vm.h" static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { @@ -87,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); - if (aobj) { - amdgpu_hmm_unregister(aobj); - ttm_bo_put(&aobj->tbo); - } + amdgpu_hmm_unregister(aobj); + ttm_bo_put(&aobj->tbo); } int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, @@ -179,6 +178,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, if (r) return r; + amdgpu_vm_bo_update_shared(abo); bo_va = amdgpu_vm_bo_find(vm, abo); if (!bo_va) bo_va = amdgpu_vm_bo_add(adev, vm, abo); @@ -252,6 +252,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; amdgpu_vm_bo_del(adev, bo_va); + amdgpu_vm_bo_update_shared(bo); if (!amdgpu_vm_ready(vm)) goto out_unlock; @@ -839,7 +840,6 @@ error: int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_op *args = data; struct drm_gem_object *gobj; struct amdgpu_vm_bo_base *base; @@ -899,7 +899,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) - amdgpu_vm_bo_invalidate(adev, robj, true); + amdgpu_vm_bo_invalidate(robj, true); amdgpu_bo_unreserve(robj); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 6d5d81f0dc4e..784b03abb3a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -2054,6 +2054,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -2072,9 +2073,12 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); } /** @@ -2090,6 +2094,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 idx; + bool sched_work = false; if (!adev->gfx.enable_cleaner_shader) return; @@ -2105,9 +2110,12 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) - amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + sched_work = true; } mutex_unlock(&adev->enforce_isolation_mutex); + + if (sched_work) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index d751995dc131..2ea98ec60220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -191,8 +191,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) @@ -297,7 +297,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; - if (vm && ring->funcs->emit_switch_buffer) + if (job && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); if (ring->funcs->emit_wave_limit && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 016a6f6c4267..98528ee94c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -846,7 +846,7 @@ out: case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; - uint32_t pcie_gen_mask; + uint32_t pcie_gen_mask, pcie_width_mask; dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); if (!dev_info) @@ -934,15 +934,18 @@ out: dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; /* Combine the chip gen mask with the platform (CPU/mobo) mask. */ - pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16); + pcie_gen_mask = adev->pm.pcie_gen_mask & + (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT); + pcie_width_mask = adev->pm.pcie_mlw_mask & + (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT); dev_info->pcie_gen = fls(pcie_gen_mask); dev_info->pcie_num_lanes = - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size; dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index c6f93cbd6739..2df2444ee892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -40,7 +40,7 @@ #define AMDGPU_MES_VERSION_MASK 0x00000fff #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 -#define AMDGPU_MES_MSCRATCH_SIZE 0x8000 +#define AMDGPU_MES_MSCRATCH_SIZE 0x40000 enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4f057996ef35..96f4b8904e9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1251,7 +1251,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *new_mem) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; @@ -1259,7 +1258,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; abo = ttm_to_amdgpu_bo(bo); - amdgpu_vm_bo_invalidate(adev, abo, evict); + amdgpu_vm_bo_move(abo, new_mem, evict); amdgpu_bo_kunmap(abo); @@ -1272,75 +1271,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, old_mem ? old_mem->mem_type : -1); } -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, - struct amdgpu_mem_stats *stats, - unsigned int sz) -{ - const unsigned int domain_to_pl[] = { - [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, - [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, - [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, - [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, - [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, - [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, - [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, - }; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_resource *res = bo->tbo.resource; - struct drm_gem_object *obj = &bo->tbo.base; - uint64_t size = amdgpu_bo_size(bo); - unsigned int type; - - if (!res) { - /* - * If no backing store use one of the preferred domain for basic - * stats. We take the MSB since that should give a reasonable - * view. - */ - BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || - TTM_PL_VRAM < TTM_PL_SYSTEM); - type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); - if (!type) - return; - type--; - if (drm_WARN_ON_ONCE(&adev->ddev, - type >= ARRAY_SIZE(domain_to_pl))) - return; - type = domain_to_pl[type]; - } else { - type = res->mem_type; - } - - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) - return; - - /* DRM stats common fields: */ - - if (drm_gem_object_is_shared_for_memory_stats(obj)) - stats[type].drm.shared += size; - else - stats[type].drm.private += size; - - if (res) { - stats[type].drm.resident += size; - - if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_BOOKKEEP)) - stats[type].drm.active += size; - else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) - stats[type].drm.purgeable += size; - } - - /* amdgpu specific stats: */ - - if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { - stats[TTM_PL_VRAM].requested += size; - if (type != TTM_PL_VRAM) - stats[TTM_PL_VRAM].evicted += size; - } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { - stats[TTM_PL_TT].requested += size; - } -} - /** * amdgpu_bo_release_notify - notification about a BO being released * @bo: pointer to a buffer object @@ -1556,6 +1486,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) } /** + * amdgpu_bo_mem_stats_placement - bo placement for memory accounting + * @bo: the buffer object we should look at + * + * BO can have multiple preferred placements, to avoid double counting we want + * to file it under a single placement for memory stats. + * Luckily, if we take the highest set bit in preferred_domains the result is + * quite sensible. + * + * Returns: + * Which of the placements should the BO be accounted under. + */ +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) +{ + uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; + + if (!domain) + return TTM_PL_SYSTEM; + + switch (rounddown_pow_of_two(domain)) { + case AMDGPU_GEM_DOMAIN_CPU: + return TTM_PL_SYSTEM; + case AMDGPU_GEM_DOMAIN_GTT: + return TTM_PL_TT; + case AMDGPU_GEM_DOMAIN_VRAM: + return TTM_PL_VRAM; + case AMDGPU_GEM_DOMAIN_GDS: + return AMDGPU_PL_GDS; + case AMDGPU_GEM_DOMAIN_GWS: + return AMDGPU_PL_GWS; + case AMDGPU_GEM_DOMAIN_OA: + return AMDGPU_PL_OA; + case AMDGPU_GEM_DOMAIN_DOORBELL: + return AMDGPU_PL_DOORBELL; + default: + return TTM_PL_SYSTEM; + } +} + +/** * amdgpu_bo_get_preferred_domain - get preferred domain * @adev: amdgpu device object * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index ab3fe7b42da7..375448627f7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -305,9 +305,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, - struct amdgpu_mem_stats *stats, - unsigned int size); +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3c1b08441a6f..babe94ade247 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3020,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; mutex_lock(&adev->firmware.mutex); - /* - * This sequence is just used on hw_init only once, no need on - * resume. - */ + ret = amdgpu_ucode_init_bo(adev); if (ret) goto failed; @@ -3148,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block) mutex_lock(&adev->firmware.mutex); + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; + ret = psp_hw_start(psp); if (ret) goto failed; @@ -3891,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_ip_block *ip_block; uint32_t fw_ver; int ret; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3923,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, struct amdgpu_bo *fw_buf_bo = NULL; uint64_t fw_pri_mc_addr; void *fw_pri_cpu_addr; + struct amdgpu_ip_block *ip_block; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 01c947066a2e..f0924aa3f4e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2832,8 +2832,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); data = con->eh_data; - if (!data) + if (!data) { + /* Returning 0 as the absence of eh_data is acceptable */ goto free; + } for (i = 0; i < pages; i++) { if (from_rom && @@ -2845,26 +2847,34 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, * one row */ if (amdgpu_umc_pages_in_a_row(adev, &err_data, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + bps[i].retired_page << + AMDGPU_GPU_PAGE_SHIFT)) { + ret = -EINVAL; goto free; - else + } else { find_pages_per_pa = true; + } } else { /* unsupported cases */ + ret = -EOPNOTSUPP; goto free; } } } else { if (amdgpu_umc_pages_in_a_row(adev, &err_data, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { + ret = -EINVAL; goto free; + } } } else { if (from_rom && !find_pages_per_pa) { if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) { /* bad page in any NPS mode in eeprom */ - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) { + ret = -EINVAL; goto free; + } } else { /* legacy bad page in eeprom, generated only in * NPS1 mode @@ -2881,6 +2891,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, /* non-nps1 mode, old RAS TA * can't support it */ + ret = -EOPNOTSUPP; goto free; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 632295bf3875..174badca27e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -362,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->sdma.num_instances) - 1; + mask = BIT_ULL(adev->sdma.num_instances) - 1; if ((val & mask) == 0) return -EINVAL; for (i = 0; i < adev->sdma.num_instances; ++i) { ring = &adev->sdma.instance[i].ring; - if (val & (1 << i)) + if (val & BIT_ULL(i)) ring->sched.ready = true; else ring->sched.ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 2db58b5812a8..5f60736051d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -107,6 +107,7 @@ struct amdgpu_sdma { struct amdgpu_irq_src doorbell_invalid_irq; struct amdgpu_irq_src pool_timeout_irq; struct amdgpu_irq_src srbm_write_irq; + struct amdgpu_irq_src ctxt_empty_irq; int num_instances; uint32_t sdma_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2d85f1ec3041..ff286940ab43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2066,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 2852a6064c9a..461fb8090ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,15 +26,15 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> +#include <drm/ttm/ttm_placement.h> #include "amdgpu_vram_mgr.h" -#include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) -#define __AMDGPU_PL_LAST (TTM_PL_PRIV + 4) +#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c9c48b782ec1..5c07777d3239 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -36,6 +36,7 @@ #include <drm/ttm/ttm_tt.h> #include <drm/drm_exec.h> #include "amdgpu.h" +#include "amdgpu_vm.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" @@ -311,6 +312,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) } /** + * amdgpu_vm_update_shared - helper to update shared memory stat + * @base: base structure for tracking BO usage in a VM + * + * Takes the vm status_lock and updates the shared memory stat. If the basic + * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called + * as well. + */ +static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + uint64_t size = amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + bool shared; + + spin_lock(&vm->status_lock); + shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + if (base->shared != shared) { + base->shared = shared; + if (shared) { + vm->stats[bo_memtype].drm.shared += size; + vm->stats[bo_memtype].drm.private -= size; + } else { + vm->stats[bo_memtype].drm.shared -= size; + vm->stats[bo_memtype].drm.private += size; + } + } + spin_unlock(&vm->status_lock); +} + +/** + * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared + * @bo: amdgpu buffer object + * + * Update the per VM stats for all the vm if needed from private to shared or + * vice versa. + */ +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) +{ + struct amdgpu_vm_bo_base *base; + + for (base = bo->vm_bo; base; base = base->next) + amdgpu_vm_update_shared(base); +} + +/** + * amdgpu_vm_update_stats_locked - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Caller need to have the vm status_lock held. Useful for when multiple update + * need to happen at the same time. + */ +static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + int64_t size = sign * amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + + /* For drm-total- and drm-shared-, BO are accounted by their preferred + * placement, see also amdgpu_bo_mem_stats_placement. + */ + if (base->shared) + vm->stats[bo_memtype].drm.shared += size; + else + vm->stats[bo_memtype].drm.private += size; + + if (res && res->mem_type < __AMDGPU_PL_NUM) { + uint32_t res_memtype = res->mem_type; + + vm->stats[res_memtype].drm.resident += size; + /* BO only count as purgeable if it is resident, + * since otherwise there's nothing to purge. + */ + if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) + vm->stats[res_memtype].drm.purgeable += size; + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) + vm->stats[bo_memtype].evicted += size; + } +} + +/** + * amdgpu_vm_update_stats - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Updates the basic memory stat when bo is added/deleted/moved. + */ +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(base, res, sign); + spin_unlock(&vm->status_lock); +} + +/** * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm * * @base: base structure for tracking BO usage in a VM @@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; + spin_lock(&vm->status_lock); + base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); + spin_unlock(&vm->status_lock); + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; @@ -1083,53 +1194,11 @@ error_free: return r; } -static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, - struct amdgpu_mem_stats *stats, - unsigned int size) -{ - struct amdgpu_vm *vm = bo_va->base.vm; - struct amdgpu_bo *bo = bo_va->base.bo; - - if (!bo) - return; - - /* - * For now ignore BOs which are currently locked and potentially - * changing their location. - */ - if (!amdgpu_vm_is_bo_always_valid(vm, bo) && - !dma_resv_trylock(bo->tbo.base.resv)) - return; - - amdgpu_bo_get_memory(bo, stats, size); - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - dma_resv_unlock(bo->tbo.base.resv); -} - void amdgpu_vm_get_memory(struct amdgpu_vm *vm, - struct amdgpu_mem_stats *stats, - unsigned int size) + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) { - struct amdgpu_bo_va *bo_va, *tmp; - spin_lock(&vm->status_lock); - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); + memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); spin_unlock(&vm->status_lock); } @@ -2075,6 +2144,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (*base != &bo_va->base) continue; + amdgpu_vm_update_stats(*base, bo->tbo.resource, -1); *base = bo_va->base.next; break; } @@ -2143,14 +2213,12 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) /** * amdgpu_vm_bo_invalidate - mark the bo as invalid * - * @adev: amdgpu_device pointer * @bo: amdgpu buffer object * @evicted: is the BO evicted * * Mark @bo as invalid. */ -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo, bool evicted) +void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted) { struct amdgpu_vm_bo_base *bo_base; @@ -2176,6 +2244,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, } /** + * amdgpu_vm_bo_move - handle BO move + * + * @bo: amdgpu buffer object + * @new_mem: the new placement of the BO move + * @evicted: is the BO evicted + * + * Update the memory stats for the new placement and mark @bo as invalid. + */ +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, + bool evicted) +{ + struct amdgpu_vm_bo_base *bo_base; + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); + amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); + spin_unlock(&vm->status_lock); + } + + amdgpu_vm_bo_invalidate(bo, evicted); +} + +/** * amdgpu_vm_get_block_size - calculate VM page table size as power of two * * @vm_size: VM size @@ -2594,6 +2688,16 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->is_compute_context = false; } +static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) +{ + for (int i = 0; i < __AMDGPU_PL_NUM; ++i) { + if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) && + vm->stats[i].evicted == 0)) + return false; + } + return true; +} + /** * amdgpu_vm_fini - tear down a vm instance * @@ -2617,7 +2721,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2666,6 +2769,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); + + if (!amdgpu_vm_stats_is_zero(vm)) { + struct amdgpu_task_info *ti = vm->task_info; + + dev_warn(adev->dev, + "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n", + ti->process_name, ti->pid, ti->task_name, ti->tgid); + } + + amdgpu_vm_put_task_info(vm->task_info); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 5d119ac26c4f..a3e128e373bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -35,6 +35,7 @@ #include "amdgpu_sync.h" #include "amdgpu_ring.h" #include "amdgpu_ids.h" +#include "amdgpu_ttm.h" struct drm_exec; @@ -202,9 +203,13 @@ struct amdgpu_vm_bo_base { /* protected by bo being reserved */ struct amdgpu_vm_bo_base *next; - /* protected by spinlock */ + /* protected by vm status_lock */ struct list_head vm_status; + /* if the bo is counted as shared in mem stats + * protected by vm status_lock */ + bool shared; + /* protected by the BO being reserved */ bool moved; }; @@ -324,10 +329,7 @@ struct amdgpu_vm_fault_info { struct amdgpu_mem_stats { struct drm_memory_stats drm; - /* buffers that requested this placement */ - uint64_t requested; - /* buffers that requested this placement - * but are currently evicted */ + /* buffers that requested this placement but are currently evicted */ uint64_t evicted; }; @@ -345,6 +347,9 @@ struct amdgpu_vm { /* Lock to protect vm_bo add/del/move on all lists of vm */ spinlock_t status_lock; + /* Memory statistics for this vm, protected by status_lock */ + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; @@ -524,8 +529,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo, bool evicted); +void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted); +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, + struct ttm_resource *new_res, int sign); +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo); +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, + bool evicted); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); @@ -576,8 +585,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_memory(struct amdgpu_vm *vm, - struct amdgpu_mem_stats *stats, - unsigned int size); + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]); int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_vm *vmbo, bool immediate); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index f78a0434a48f..b0bf21682115 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1); entry->bo->vm_bo = NULL; ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7d26a962f811..ff5e52025266 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, else remaining_size -= size; } - mutex_unlock(&mgr->lock); if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { struct drm_buddy_block *dcc_block; @@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, (u64)vres->base.size, &vres->blocks); } + mutex_unlock(&mgr->lock); vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index e209b5e101df..23b6f7a4aa4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -427,7 +427,7 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev, return; sched = entity->entity.rq->sched; - if (sched->ready) { + if (drm_sched_wqueue_ready(sched)) { ring = to_amdgpu_ring(entity->entity.rq->sched); atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 003522c2d902..5ba263fe5512 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -45,6 +45,7 @@ #include "clearstate_gfx10.h" #include "v10_structs.h" #include "gfx_v10_0.h" +#include "gfx_v10_0_cleaner_shader.h" #include "nbio_v2_3.h" /* @@ -4738,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) break; } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 64 && + adev->gfx.pfp_fw_version >= 100 && + adev->gfx.mec_fw_version >= 122) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h new file mode 100644 index 000000000000..663c2572d440 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_10_3_0 */ +static const u32 gfx_10_3_0_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020002, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm new file mode 100644 index 000000000000..0e1c246166c0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// GFX10.3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index fe5d7cd814f7..56c06b72a70a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1891,6 +1891,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1908,9 +1909,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } active_rb_bitmap &= global_active_rb_bitmap; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 40c47a2dd060..2523221a2519 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1352,6 +1352,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->gfx.me_fw_version >= 2480 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2680 && + adev->mes.fw_version[0] >= 100) + adev->gfx.enable_cleaner_shader = true; + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1442,11 +1450,19 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if ((adev->gfx.me_fw_version >= 2660) && + (adev->gfx.mec_fw_version >= 2920)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } + } if (!adev->enable_mes_kiq) { r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); @@ -1615,6 +1631,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1632,12 +1649,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -4002,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index af73f85527b7..6a025438f9d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5639,8 +5639,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { @@ -5734,8 +5732,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5745,8 +5741,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; @@ -5827,12 +5821,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev } gfx_v8_0_wait_for_rlc_serdes(adev); - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) * === MGCG + MGLS + TS(CG/LS) === @@ -5846,6 +5840,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); gfx_v8_0_update_medium_grain_clock_gating(adev, enable); } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4b5006dc3d34..fa572b40989e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4964,8 +4964,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ @@ -5030,8 +5028,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -5042,8 +5038,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* Enable 3D CGCG/CGLS */ if (enable) { /* write cmd to clear cgcg/cgls ov */ @@ -5085,8 +5079,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5094,8 +5086,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ @@ -5137,13 +5127,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === @@ -5163,6 +5152,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === MGCG + MGLS === */ gfx_v9_0_update_medium_grain_clock_gating(adev, enable); } + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index a5bdcaf7a081..2ba185875baa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -579,11 +579,16 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, { int err; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sjt_mec.bin", chip_name); - else + + if (err) + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); + } else err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 5b537806b4da..901e924e69ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -756,7 +756,8 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + + pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } if (enforce_isolation) @@ -983,29 +984,50 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) uint32_t pipe, data = 0; if (enable) { - data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { soc21_grbm_select(adev, 3, pipe, 0, 0); + if (amdgpu_mes_log_enable) { + u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE; + /* In case uni mes is not enabled, only program for pipe 0 */ + if (adev->mes.event_log_size >= (pipe + 1) * log_size) { + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO, + lower_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI, + upper_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n", + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI), + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO)); + } + } + + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); + if (pipe == 0) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + else + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START, lower_32_bits(ucode_addr)); WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI, upper_32_bits(ucode_addr)); + + /* unhalt MES and activate one pipe each loop */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + if (pipe) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); + dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data); + + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* unhalt MES and activate pipe0 */ - data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - if (amdgpu_emu_mode) msleep(100); else if (adev->enable_uni_mes) @@ -1479,8 +1501,9 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; adev->mes.enable_legacy_queue_map = true; - adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE; - + adev->mes.event_log_size = adev->enable_uni_mes ? + (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) : + (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 56507ae919b0..5e0066cd6c51 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1406,6 +1406,12 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) &adev->sdma.srbm_write_irq); if (r) return r; + + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_CTXEMPTY, + &adev->sdma.ctxt_empty_irq); + if (r) + return r; } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1602,7 +1608,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) int i, r; u32 inst_mask; - if ((adev->flags & AMD_IS_APU) || amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) return -EINVAL; /* stop queue */ @@ -1814,6 +1820,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* There is nothing useful to be done here, only kept for debug */ + dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt"); + sdma_v4_4_2_print_iv_entry(adev, entry); + return 0; +} + static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { @@ -2096,6 +2112,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = { .process = sdma_v4_4_2_process_srbm_write_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = { + .process = sdma_v4_4_2_process_ctxt_empty_irq, +}; + static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = adev->sdma.num_instances; @@ -2104,6 +2124,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; + adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs; @@ -2112,6 +2133,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs; adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs; adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs; + adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 388b44ed5928..984f0e705078 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820258, + 0xbf820001, 0xbf820259, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -390,141 +390,98 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, + 0xd2890000, 0x00000901, 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, + 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840063, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf84005f, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, - 0xd2890000, 0x00000900, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, + 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840063, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf84005f, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -544,137 +501,181 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200c7, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001e, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf840019, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xbf8c0f70, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200c7, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf840019, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe0070, - 0xbeff0071, 0x866f7bff, - 0x000003ff, 0xb96f4803, - 0x866f7bff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1302,7 +1303,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202d4, + 0xbf820001, 0xbf8202d5, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -1419,99 +1420,37 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -1530,31 +1469,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1574,215 +1532,259 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbefc0080, - 0xbf11017c, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850059, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbefc0080, 0xbf11017c, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850059, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xbe840080, + 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffa9, 0xbf9c0000, - 0xbf820016, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf8200e3, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf84001a, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffa9, + 0xbf9c0000, 0xbf820016, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffeb, 0xbf9c0000, + 0xbf8200e3, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf84001a, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x806fff6f, - 0x00008000, 0xbef80080, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbefc0080, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202df, + 0xbf820001, 0xbf8202e0, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -1899,99 +1901,37 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2010,31 +1950,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -2054,51 +2013,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2137,139 +2076,203 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -3151,7 +3154,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { }; static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { - 0xbf820001, 0xbf8202db, + 0xbf820001, 0xbf8202dc, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -3266,99 +3269,37 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xbefe007c, 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbf108080, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3377,31 +3318,50 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -3421,51 +3381,31 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3504,139 +3444,203 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b79, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx12_hex[] = { @@ -4124,27 +4128,25 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { }; static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { - 0xbf820001, 0xbf8202d8, + 0xbf820001, 0xbf8202ca, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840008, 0xbf0d986d, - 0xbf85001f, 0x866eff7b, - 0x00000400, 0xbf850061, - 0xbf8e0010, 0xb8fbf803, - 0xbf82fffa, 0x866eff7b, - 0x03800900, 0xbf850015, - 0x866eff7b, 0x000071ff, - 0xbf840008, 0x866fff7b, - 0x00007080, 0xbf840001, - 0xbeee1a87, 0xb8eff801, - 0x8e6e8c6e, 0x866e6f6e, - 0xbf85000a, 0xbf0d986d, - 0xbf850003, 0x866eff6d, - 0x00ff0000, 0xbf850005, - 0xbf0d986d, 0xbf850004, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001a, 0x866eff7b, 0x00000400, - 0xbf850046, 0xbeed1a9d, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x03c00900, + 0xbf850011, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf850006, + 0x866eff6d, 0x00ff0000, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf85003a, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8979ff79, 0xfc000000, @@ -4153,100 +4155,95 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031cfd, 0x00000010, - 0xc0071bbd, 0x00000000, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8979ff79, 0x00800000, + 0x87796e79, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x8e739773, - 0x8979ff79, 0x01800000, - 0x87797379, 0xbf0d986d, - 0xbf840009, 0xbf0d9879, - 0xbf850007, 0x896dff6d, - 0x01ff0000, 0xba7f0583, - 0x00000000, 0xbf0d9d6d, - 0xbeed189d, 0xbf840012, - 0xbef91898, 0xbeed189d, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0x866eff6d, - 0x01ff0000, 0xbf850005, - 0x8778ff78, 0x00002000, - 0x80ec886c, 0x82ed806d, - 0xbf820005, 0x866eff6d, - 0x01000000, 0xbf850002, - 0x806c846c, 0x826d806d, - 0x866dff6d, 0x0000ffff, - 0x8f7a8b79, 0x867aff7a, - 0x001f8000, 0xb97af807, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e8378, 0xb96ee0c2, - 0xbf800002, 0xb9780002, - 0xbe801f6c, 0x866dff6d, - 0x0000ffff, 0xbefa0080, - 0xb97a0283, 0xb8faf807, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, + 0xbf850002, 0x806c846c, + 0x826d806d, 0x866dff6d, + 0x0000ffff, 0x8f7a8b79, 0x867aff7a, 0x001f8000, - 0x8e7a8b7a, 0x8979ff79, - 0xfc000000, 0x87797a79, - 0xba7ff807, 0x00000000, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2985, 0x807a817a, - 0x8e7a8a7a, 0x8e7a817a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0xbef1007c, - 0xbef00080, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, + 0xb97af807, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8378, + 0xb96ee0c2, 0xbf800002, + 0xb9780002, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8979ff79, 0xfc000000, + 0x87797a79, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2985, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, + 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, + 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbf108080, 0x867aff7f, 0x04000000, 0xbeef0080, 0x876f6f7a, 0xb8f02985, 0x80708170, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 44772eec9ef4..96fbb16ceb21 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -34,41 +34,24 @@ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 * sp3 gfx11.sp3 -hex gfx11.hex * - * gfx12: - * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3 - * sp3 gfx12.sp3 -hex gfx12.hex */ #define CHIP_NAVI10 26 #define CHIP_SIENNA_CICHLID 30 #define CHIP_PLUM_BONITO 36 -#define CHIP_GFX12 37 #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) -#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12) +#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO) #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised -#if ASIC_FAMILY < CHIP_GFX12 #define S_COHERENCE glc:1 #define V_COHERENCE slc:1 glc:1 #define S_WAITCNT_0 s_waitcnt 0 -#else -#define S_COHERENCE scope:SCOPE_SYS -#define V_COHERENCE scope:SCOPE_SYS -#define S_WAITCNT_0 s_wait_idle - -#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO -#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI -#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC -#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC -#define HW_REG_MODE HW_REG_WAVE_MODE -#endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 @@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 var S_SAVE_PC_HI_HT_MASK = 0x01000000 -#else -var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 -var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 -var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 -var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 -var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 -var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 -var SQ_WAVE_STATUS_WAVE64_SIZE = 1 -var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 -var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV -var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK -var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK -var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 -#endif var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 #endif -#if ASIC_FAMILY < CHIP_GFX12 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 @@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT var S_TRAPSTS_HWREG = HW_REG_TRAPSTS var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT -#else -var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF -var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 -var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 -var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 -var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 -var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 -var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 -var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 -var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 - -var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV -var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK -var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ - SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK -var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT -var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT -var BARRIER_STATE_SIGNAL_OFFSET = 16 -var BARRIER_STATE_VALID_OFFSET = 0 -#endif // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 @@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER: L_HALTED: // Host trap may occur while wave is halted. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK -#else - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP L_CHECK_SAVE: @@ -336,7 +266,6 @@ L_NOT_HALTED: // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. // Maskable exceptions only cause the wave to enter the trap handler if // their respective bit in mode.excp_en is set. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK s_cbranch_scc0 L_CHECK_TRAP_ID @@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH: s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT s_and_b32 ttmp2, ttmp2, ttmp3 s_cbranch_scc1 L_FETCH_2ND_TRAP -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK - s_cbranch_scc0 L_NOT_ADDR_WATCH - s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK - -L_NOT_ADDR_WATCH: - s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) - s_and_b32 ttmp2, ttmp3, ttmp2 - s_cbranch_scc1 L_FETCH_2ND_TRAP -#endif L_CHECK_TRAP_ID: // Check trap_id != 0 @@ -369,13 +287,8 @@ L_CHECK_TRAP_ID: #if SINGLE_STEP_MISSED_WORKAROUND // Prioritize single step exception over context save. // Second-level trap will halt wave and RFE, re-entering for SAVECTX. -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK -#else - // WAVE_TRAP_CTRL is already in ttmp3. - s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK -#endif s_cbranch_scc1 L_FETCH_2ND_TRAP #endif @@ -425,12 +338,7 @@ L_NO_NEXT_TRAP: s_cbranch_scc1 L_TRAP_CASE // Host trap will not cause trap re-entry. -#if ASIC_FAMILY < CHIP_GFX12 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK -#else - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK -#endif s_cbranch_scc1 L_EXIT_TRAP s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK @@ -457,16 +365,7 @@ L_EXIT_TRAP: s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 -#if ASIC_FAMILY < CHIP_GFX12 s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status -#else - // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. - // Only restore fields which the trap handler changes. - s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT - s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ - SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status -#endif - s_rfe_b64 [ttmp0, ttmp1] L_SAVE: @@ -478,14 +377,6 @@ L_SAVE: s_endpgm L_HAVE_VGPRS: #endif -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT - s_cbranch_scc0 L_HAVE_VGPRS - s_endpgm -L_HAVE_VGPRS: -#endif - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] s_mov_b32 s_save_tmp, 0 s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit @@ -671,19 +562,6 @@ L_SAVE_HWREG: s_mov_b32 m0, 0x0 //Next lane of v2 to write to #endif -#if ASIC_FAMILY >= CHIP_GFX12 - // Ensure no further changes to barrier or LDS state. - // STATE_PRIV.BARRIER_COMPLETE may change up to this point. - s_barrier_signal -2 - s_barrier_wait -2 - - // Re-read final state of BARRIER_COMPLETE field for save. - s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG) - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK - s_or_b32 s_save_status, s_save_status, s_save_tmp -#endif - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK @@ -707,21 +585,6 @@ L_SAVE_HWREG: s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) -#if ASIC_FAMILY >= CHIP_GFX12 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) - - s_get_barrier_state s_save_tmp, -1 - s_wait_kmcnt (0) - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) -#endif - #if NO_SQC_STORE // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF @@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL: s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //LDS is used? wait for other waves in the same TG -#endif s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK s_cbranch_scc0 L_SAVE_LDS_DONE @@ -1081,11 +942,6 @@ L_RESTORE: s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC -#if ASIC_FAMILY >= CHIP_GFX12 - // Save s_restore_spi_init_hi for later use. - s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi -#endif - //determine it is wave32 or wave64 get_wave_size2(s_restore_size) @@ -1320,9 +1176,7 @@ L_RESTORE_SGPR: // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception. // Clear DEBUG_EN before and restore MODE after the barrier. s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0 -#if ASIC_FAMILY < CHIP_GFX12 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG -#endif /* restore HW registers */ L_RESTORE_HWREG: @@ -1334,11 +1188,6 @@ L_RESTORE_HWREG: s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes -#if ASIC_FAMILY >= CHIP_GFX12 - // Restore s_restore_spi_init_hi before the saved value gets clobbered. - s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save -#endif - read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) @@ -1358,44 +1207,6 @@ L_RESTORE_HWREG: s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch -#if ASIC_FAMILY >= CHIP_GFX12 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp - - // Only the first wave needs to restore the workgroup barrier. - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // Skip over WAVE_STATUS, since there is no state to restore from it - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 - - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) - S_WAITCNT_0 - - s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - - // extract the saved signal count from s_restore_tmp - s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET - - // We need to call s_barrier_signal repeatedly to restore the signal - // count of the work group barrier. The member count is already - // initialized with the number of waves in the work group. -L_BARRIER_RESTORE_LOOP: - s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE - s_barrier_signal -1 - s_add_i32 s_restore_tmp, s_restore_tmp, -1 - s_branch L_BARRIER_RESTORE_LOOP - -L_SKIP_BARRIER_RESTORE: -#endif - s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV: s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu -#if ASIC_FAMILY >= CHIP_GFX12 - // Make barrier and LDS state visible to all waves in the group. - // STATE_PRIV.BARRIER_COMPLETE may change after this point. - s_barrier_signal -2 - s_barrier_wait -2 -#endif - s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: @@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes end function get_wave_size2(s_reg) -#if ASIC_FAMILY < CHIP_GFX12 s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) -#else - s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) -#endif s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm new file mode 100644 index 000000000000..1740e98c6719 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -0,0 +1,1126 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * + * gfx12: + * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3 + * sp3 gfx12.sp3 -hex gfx12.hex + */ + +#define CHIP_GFX12 37 + +#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised + +var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 +var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 +var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 +var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 +var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 +var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 +var SQ_WAVE_STATUS_WAVE64_SIZE = 1 +var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 +var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK +var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 +var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 + +var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF +var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 +var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 +var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 +var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 +var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 +var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 + +var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ + SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT +var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT +var BARRIER_STATE_SIGNAL_OFFSET = 16 +var BARRIER_STATE_VALID_OFFSET = 0 + +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 + +// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] +// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 +var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 + +var s_sgpr_save_num = 108 + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi +var s_save_pc_lo = ttmp0 +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_state_priv = ttmp12 +var s_save_excp_flag_priv = ttmp15 +var s_save_xnack_mask = s_save_excp_flag_priv +var s_wave_size = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_mem_offset = ttmp4 +var s_save_alloc_size = s_save_excp_flag_priv +var s_save_tmp = ttmp14 +var s_save_m0 = ttmp5 +var s_save_ttmps_lo = s_save_tmp +var s_save_ttmps_hi = s_save_excp_flag_priv + +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 +var S_WAVE_SIZE = 25 + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp +var s_restore_m0 = s_restore_alloc_size +var s_restore_mode = ttmp7 +var s_restore_flat_scratch = s_restore_tmp +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_state_priv = ttmp14 +var s_restore_excp_flag_priv = ttmp15 +var s_restore_xnack_mask = ttmp13 +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_size = ttmp6 +var s_restore_ttmps_lo = s_restore_tmp +var s_restore_ttmps_hi = s_restore_alloc_size +var s_restore_spi_init_hi_save = s_restore_exec_hi + +shader main + asic(DEFAULT) + type(CS) + wave_size(32) + + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE + +L_SKIP_RESTORE: + s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC + + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK + + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + + s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_SAVE: + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. + s_sleep 0x10 + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) + s_and_b32 ttmp2, ttmp3, ttmp2 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +#if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + // WAVE_TRAP_CTRL is already in ttmp3. + s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +#endif + + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK + s_cbranch_scc1 L_SAVE + +L_FETCH_2ND_TRAP: + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) + s_wait_idle + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_bitcmp1_b32 ttmp15, 0xF + s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA + s_or_b32 ttmp15, ttmp15, 0xFFFF0000 +L_NO_SIGN_EXTEND_TMA: + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag + s_wait_idle + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA + s_wait_idle + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA + s_wait_idle + + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_TRAP_CASE + + // Host trap will not cause trap re-entry. + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK + s_cbranch_scc1 L_EXIT_TRAP + s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. + // Only restore fields which the trap handler changes. + s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ + SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv + + s_rfe_b64 [ttmp0, ttmp1] + +L_SAVE: + // If VGPRs have been deallocated then terminate the wavefront. + // It has no remaining program to run and cannot save without VGPRs. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT + s_cbranch_scc0 L_HAVE_VGPRS + s_endpgm +L_HAVE_VGPRS: + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) + s_wait_idle + + // Save first_wave flag so we can clear high bits of save address. + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + + // Trap temporaries must be saved via VGPR but all VGPRs are in use. + // There is no ttmp space to hold the resource constant for VGPR save. + // Save v0 by itself since it requires only two SGPRs. + s_mov_b32 s_save_ttmps_lo, exec_lo + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0xFFFFFFFF + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS + v_mov_b32 v0, 0x0 + s_mov_b32 exec_lo, s_save_ttmps_lo + s_mov_b32 exec_hi, s_save_ttmps_hi + + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_wave_size2(s_save_ttmps_hi) + get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) + get_svgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 + + v_writelane_b32 v0, ttmp4, 0x4 + v_writelane_b32 v0, ttmp5, 0x5 + v_writelane_b32 v0, ttmp6, 0x6 + v_writelane_b32 v0, ttmp7, 0x7 + v_writelane_b32 v0, ttmp8, 0x8 + v_writelane_b32 v0, ttmp9, 0x9 + v_writelane_b32 v0, ttmp10, 0xA + v_writelane_b32 v0, ttmp11, 0xB + v_writelane_b32 v0, ttmp13, 0xD + v_writelane_b32 v0, exec_lo, 0xE + v_writelane_b32 v0, exec_hi, 0xF + + s_mov_b32 exec_lo, 0x3FFF + s_mov_b32 exec_hi, 0x0 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS + v_readlane_b32 ttmp14, v0, 0xE + v_readlane_b32 ttmp15, v0, 0xF + s_mov_b32 exec_lo, ttmp14 + s_mov_b32 exec_hi, ttmp15 + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + + s_mov_b32 s_save_m0, m0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 + get_wave_size2(s_wave_size) + + /* save first 4 VGPRs, needed for SGPR save */ + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_4VGPR_WAVE32 +L_ENABLE_SAVE_4VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF + s_branch L_SAVE_4VGPR_WAVE64 +L_SAVE_4VGPR_WAVE32: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + s_branch L_SAVE_HWREG + +L_SAVE_4VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + /* save HW registers */ + +L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource + v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource + v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store + s_mov_b32 m0, 0x0 //Next lane of v2 to write to + + // Ensure no further changes to barrier or LDS state. + // STATE_PRIV.BARRIER_COMPLETE may change up to this point. + s_barrier_signal -2 + s_barrier_wait -2 + + // Re-read final state of BARRIER_COMPLETE field for save. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV) + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK + s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp + + write_hwreg_to_v2(s_save_m0) + write_hwreg_to_v2(s_save_pc_lo) + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + write_hwreg_to_v2(s_save_tmp) + write_hwreg_to_v2(s_save_exec_lo) + write_hwreg_to_v2(s_save_exec_hi) + write_hwreg_to_v2(s_save_state_priv) + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) + write_hwreg_to_v2(s_save_tmp) + + write_hwreg_to_v2(s_save_xnack_mask) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) + write_hwreg_to_v2(s_save_m0) + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) + write_hwreg_to_v2(s_save_tmp) + + s_get_barrier_state s_save_tmp, -1 + s_wait_kmcnt (0) + write_hwreg_to_v2(s_save_tmp) + + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF + s_mov_b32 exec_hi, 0x0 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. + s_mov_b32 exec_lo, 0xFFFFFFFF + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + + // SGPR SR memory offset : size(VGPR)+size(SVGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states +L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_v2(s0) + + s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? + s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 + s_mov_b32 ttmp13, 0x0 + v_mov_b32 v2, 0x0 +L_SAVE_SGPR_SKIP_TCP_STORE: + + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? + + //save the rest 12 SGPR + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + write_12sgpr_to_v2(s0) + + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + /* save LDS */ + +L_SAVE_LDS: + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_LDS_NORMAL +L_ENABLE_SAVE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_LDS_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + //load 0~63*4(byte address) to vgpr v0 + v_mbcnt_lo_u32_b32 v0, -1, 0 + v_mbcnt_hi_u32_b32 v0, -1, v0 + v_mul_u32_u24 v0, 4, v0 + + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_SAVE_LDS_W64 + +L_SAVE_LDS_W32: + s_mov_b32 s3, 128 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W32: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_W64: + s_mov_b32 s3, 256 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W64: + ds_read_b32 v1, v0 + s_wait_idle + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? + +L_SAVE_LDS_DONE: + /* save VGPRs - set the Rest VGPRs */ +L_SAVE_VGPR: + // VGPR SR memory offset: 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI + s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_VGPR_NORMAL +L_ENABLE_SAVE_VGPR_EXEC_HI: + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_VGPR_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_SAVE_VGPR_WAVE64 + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + +L_SAVE_VGPR_W32_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? + + s_branch L_SAVE_VGPR_END + +L_SAVE_VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_SHARED_VGPR + +L_SAVE_VGPR_W64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? + +L_SAVE_SHARED_VGPR: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //save shared_vgpr will start from the index of m0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 + +L_SAVE_SHARED_VGPR_WAVE64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? + +L_SAVE_VGPR_END: + s_branch L_END_PGM + +L_RESTORE: + /* Setup Resource Contants */ + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + + // Save s_restore_spi_init_hi for later use. + s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi + + //determine it is wave32 or wave64 + get_wave_size2(s_restore_size) + + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ +L_RESTORE_LDS: + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_LDS_NORMAL +L_ENABLE_RESTORE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_LDS_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 + +L_RESTORE_LDS_LOOP_W32: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 128 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? + s_branch L_RESTORE_VGPR + +L_RESTORE_LDS_LOOP_W64: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_wait_idle + ds_store_addtid_b32 v0 + s_add_u32 m0, m0, 256 // 256 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? + + /* restore VGPRs */ +L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_VGPR_NORMAL +L_ENABLE_RESTORE_VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_VGPR_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE32_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? + + /* VGPR restore on v0 */ + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3 + s_wait_idle + + s_branch L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE64: + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SHARED_VGPR + +L_RESTORE_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3 + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + +L_RESTORE_SHARED_VGPR: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //restore shared_vgpr will start from the index of m0 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 +L_RESTORE_SHARED_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS + s_wait_idle + v_movreld_b32 v0, v0 //v[0+m0] = v0 + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + + s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! + + /* VGPR restore on v0 */ +L_RESTORE_V0: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3 + s_wait_idle + + /* restore SGPRs */ + //will be 2+8+16*6 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) +L_RESTORE_SGPR: + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 m0, s_sgpr_save_num + + read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + + read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP + + // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception. + // Clear DEBUG_EN before and restore MODE after the barrier. + s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0 + + /* restore HW registers */ +L_RESTORE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // Restore s_restore_spi_init_hi before the saved value gets clobbered. + s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp + + // Only the first wave needs to restore the workgroup barrier. + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // Skip over WAVE_STATUS, since there is no state to restore from it + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 + + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) + s_wait_idle + + s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + + // extract the saved signal count from s_restore_tmp + s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET + + // We need to call s_barrier_signal repeatedly to restore the signal + // count of the work group barrier. The member count is already + // initialized with the number of waves in the work group. +L_BARRIER_RESTORE_LOOP: + s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE + s_barrier_signal -1 + s_add_i32 s_restore_tmp, s_restore_tmp, -1 + s_branch L_BARRIER_RESTORE_LOOP + +L_SKIP_BARRIER_RESTORE: + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. + // Only restore the other fields to avoid clobbering them. + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv + + s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode + + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) + get_svgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS + s_wait_idle + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu + + // Make barrier and LDS state visible to all waves in the group. + // STATE_PRIV.BARRIER_COMPLETE may change after this point. + s_barrier_signal -2 + s_barrier_wait -2 + + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + +L_END_PGM: + s_endpgm_saved +end + +function write_hwreg_to_v2(s) + // Copy into VGPR for later TCP store. + v_writelane_b32 v2, s, m0 + s_add_u32 m0, m0, 0x1 +end + + +function write_16sgpr_to_v2(s) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], ttmp13 + s_add_u32 ttmp13, ttmp13, 0x1 + end +end + +function write_12sgpr_to_v2(s) + // Copy into VGPR for later TCP store. + for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ + v_writelane_b32 v2, s[sgpr_idx], ttmp13 + s_add_u32 ttmp13, ttmp13, 0x1 + end +end + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*8 + s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*4 + s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS +end + +function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_bitcmp1_b32 s_size, S_WAVE_SIZE + s_cbranch_scc1 L_ENABLE_SHIFT_W64 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) + s_branch L_SHIFT_DONE +L_ENABLE_SHIFT_W64: + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) +L_SHIFT_DONE: +end + +function get_svgpr_size_bytes(s_svgpr_size_byte) + s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) +end + +function get_sgpr_size_bytes + return 512 +end + +function get_hwreg_size_bytes + return 128 +end + +function get_wave_size2(s_reg) + s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) + s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE +end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0eabb7a8cab9..6869e07a2fff 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -447,7 +447,9 @@ L_SAVE: s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) - + // Clear VSKIP state now that MODE.VSKIP has been saved. + // If user shader set it then vector instructions would be skipped. + s_setvskip 0,0 /* the first wave in the threadgroup */ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 312dfa84f29f..a8abc3091801 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; + struct amdgpu_device *adev = pdd->dev->adev; + int r; if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; + if (!pdd->proc_ctx_cpu_ptr) { + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (r) { + dev_err(adev->dev, + "failed to allocate process context bo\n"); + return r; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, pdd->watch_points, flags, sq_trap_en); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1405e8affd48..d4593374e7a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 4b275937d05e..d05d199b5e44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -278,10 +278,11 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t ttm_res_offset) { - uint64_t npages = migrate->cpages; + uint64_t npages = migrate->npages; struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; + uint64_t mpages = 0; dma_addr_t *src; uint64_t *dst; uint64_t i, j; @@ -295,14 +296,16 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); - for (i = j = 0; i < npages; i++) { + for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) { struct page *spage; - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - migrate->dst[i] = migrate_pfn(migrate->dst[i]); - + if (migrate->src[i] & MIGRATE_PFN_MIGRATE) { + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + mpages++; + } spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, @@ -353,9 +356,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, out_free_vram_pages: if (r) { pr_debug("failed %d to copy memory to vram\n", r); - while (i--) { + for (i = 0; i < npages && mpages; i++) { + if (!dst[i]) + continue; svm_migrate_put_vram_page(adev, dst[i]); migrate->dst[i] = 0; + mpages--; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c32b255c0eb2..d8cd913aa772 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -32,7 +32,7 @@ #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/spinlock.h> -#include <linux/kfd_ioctl.h> +#include <uapi/linux/kfd_ioctl.h> #include <linux/idr.h> #include <linux/kfifo.h> #include <linux/seq_file.h> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0976b5b0e8e8..083f83c94531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1160,7 +1160,8 @@ static void kfd_process_wq_release(struct work_struct *work) */ synchronize_rcu(); ef = rcu_access_pointer(p->ef); - dma_fence_signal(ef); + if (ef) + dma_fence_signal(ef); kfd_process_remove_sysfs(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9df56f8e09f9..bcddd989c7f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 11e3f2f3b174..abd3b6564373 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,6 +8,8 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64 + select CEC_CORE + select CEC_NOTIFIER select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b631b2eba0f0..ac3fd81fecef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -98,6 +98,7 @@ #include <drm/drm_audio_component.h> #include <drm/drm_gem_atomic_helper.h> +#include <media/cec-notifier.h> #include <acpi/video.h> #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" @@ -2033,6 +2034,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH) adev->dm.dc->debug.force_subvp_mclk_switch = true; + if (amdgpu_dc_debug_mask & DC_DISABLE_SUBVP) + adev->dm.dc->debug.force_disable_subvp = true; + if (amdgpu_dc_debug_mask & DC_ENABLE_DML2) { adev->dm.dc->debug.using_dml2 = true; adev->dm.dc->debug.using_dml21 = true; @@ -2158,6 +2162,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) amdgpu_dm_crtc_secure_display_create_contexts(adev); if (!adev->dm.secure_display_ctx.crtc_ctx) DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); + + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) + adev->dm.secure_display_ctx.support_mul_roi = true; + #endif DRM_DEBUG_DRIVER("KMS initialized.\n"); @@ -2750,6 +2758,48 @@ out_fail: mutex_unlock(&mgr->lock); } +void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector) +{ + struct cec_notifier *n = aconnector->notifier; + + if (!n) + return; + + cec_notifier_phys_addr_invalidate(n); +} + +void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector) +{ + struct drm_connector *connector = &aconnector->base; + struct cec_notifier *n = aconnector->notifier; + + if (!n) + return; + + cec_notifier_set_phys_addr(n, + connector->display_info.source_physical_address); +} + +static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend) +{ + struct amdgpu_dm_connector *aconnector; + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + + drm_connector_list_iter_begin(ddev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + aconnector = to_amdgpu_dm_connector(connector); + if (suspend) + hdmi_cec_unset_edid(aconnector); + else + hdmi_cec_set_edid(aconnector); + } + drm_connector_list_iter_end(&conn_iter); +} + static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; @@ -3021,6 +3071,8 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block) if (IS_ERR(adev->dm.cached_state)) return PTR_ERR(adev->dm.cached_state); + s3_handle_hdmi_cec(adev_to_drm(adev), true); + s3_handle_mst(adev_to_drm(adev), true); amdgpu_dm_irq_suspend(adev); @@ -3293,6 +3345,8 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) */ amdgpu_dm_irq_resume_early(adev); + s3_handle_hdmi_cec(ddev, false); + /* On resume we need to rewrite the MSTM control bits to enable MST*/ s3_handle_mst(ddev, false); @@ -3607,6 +3661,7 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_retain(aconnector->dc_sink); if (sink->dc_edid.length == 0) { aconnector->drm_edid = NULL; + hdmi_cec_unset_edid(aconnector); if (aconnector->dc_link->aux_mode) { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); } @@ -3616,6 +3671,7 @@ void amdgpu_dm_update_connector_after_detect( aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length); drm_edid_connector_update(connector, aconnector->drm_edid); + hdmi_cec_set_edid(aconnector); if (aconnector->dc_link->aux_mode) drm_dp_cec_attach(&aconnector->dm_dp_aux.aux, connector->display_info.source_physical_address); @@ -3632,6 +3688,7 @@ void amdgpu_dm_update_connector_after_detect( amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid); update_connector_ext_caps(aconnector); } else { + hdmi_cec_unset_edid(aconnector); drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); amdgpu_dm_update_freesync_caps(connector, NULL); aconnector->num_modes = 0; @@ -7048,6 +7105,7 @@ static void amdgpu_dm_connector_unregister(struct drm_connector *connector) if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector)) sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group); + cec_notifier_conn_unregister(amdgpu_dm_connector->notifier); drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux); } @@ -8284,6 +8342,27 @@ create_i2c(struct ddc_service *ddc_service, return i2c; } +int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector) +{ + struct cec_connector_info conn_info; + struct drm_device *ddev = aconnector->base.dev; + struct device *hdmi_dev = ddev->dev; + + if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) { + drm_info(ddev, "HDMI-CEC feature masked\n"); + return -EINVAL; + } + + cec_fill_conn_info_from_drm(&conn_info, &aconnector->base); + aconnector->notifier = + cec_notifier_conn_register(hdmi_dev, NULL, &conn_info); + if (!aconnector->notifier) { + drm_err(ddev, "Failed to create cec notifier\n"); + return -ENOMEM; + } + + return 0; +} /* * Note: this function assumes that dc_link_detect() was called for the @@ -8347,6 +8426,10 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, drm_connector_attach_encoder( &aconnector->base, &aencoder->base); + if (connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) + amdgpu_dm_initialize_hdmi_connector(aconnector); + if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector_type == DRM_MODE_CONNECTOR_eDP) amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index); @@ -8406,16 +8489,6 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) { - /* - * We have no guarantee that the frontend index maps to the same - * backend index - some even map to more than one. - * - * TODO: Use a different interrupt or check DC itself for the mapping. - */ - int irq_type = - amdgpu_display_crtc_idx_to_irq_type( - adev, - acrtc->crtc_id); struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8441,28 +8514,7 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); - - amdgpu_irq_get( - adev, - &adev->pageflip_irq, - irq_type); -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_get( - adev, - &adev->vline0_irq, - irq_type); -#endif } else { -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_put( - adev, - &adev->vline0_irq, - irq_type); -#endif - amdgpu_irq_put( - adev, - &adev->pageflip_irq, - irq_type); drm_crtc_vblank_off(&acrtc->base); } } @@ -8933,6 +8985,7 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; struct amdgpu_dm_connector *aconn = (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; + bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); if (acrtc_state->update_type > UPDATE_TYPE_FAST) { if (pr->config.replay_supported && !pr->replay_feature_enabled) @@ -8959,14 +9012,15 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach, * adequate number of fast atomic commits to notify KMD * of update events. See `vblank_control_worker()`. */ - if (acrtc_attach->dm_irq_params.allow_sr_entry && + if (!vrr_active && + acrtc_attach->dm_irq_params.allow_sr_entry && #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && #endif (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) { if (pr->replay_feature_enabled && !pr->replay_allow_active) amdgpu_dm_replay_enable(acrtc_state->stream, true); - if (psr->psr_version >= DC_PSR_VERSION_SU_1 && + if (psr->psr_version == DC_PSR_VERSION_SU_1 && !psr->psr_allow_active && !aconn->disallow_edp_enter_psr) amdgpu_dm_psr_enable(acrtc_state->stream); } @@ -9137,7 +9191,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns = timestamp_ns; if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); mutex_unlock(&dm->dc_lock); } } @@ -9303,11 +9357,11 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.abm_level = &acrtc_state->abm_level; mutex_lock(&dm->dc_lock); - if (acrtc_state->update_type > UPDATE_TYPE_FAST) { + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) { if (acrtc_state->stream->link->replay_settings.replay_allow_active) amdgpu_dm_replay_disable(acrtc_state->stream); if (acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + amdgpu_dm_psr_disable(acrtc_state->stream, true); } mutex_unlock(&dm->dc_lock); @@ -10066,14 +10120,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) { #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { + uint8_t cnt; spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - acrtc->dm_irq_params.window_param.update_win = true; - - /** - * It takes 2 frames for HW to stably generate CRC when - * resuming from suspend, so we set skip_frame_cnt 2. - */ - acrtc->dm_irq_params.window_param.skip_frame_cnt = 2; + for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) { + if (acrtc->dm_irq_params.window_param[cnt].enable) { + acrtc->dm_irq_params.window_param[cnt].update_win = true; + + /** + * It takes 2 frames for HW to stably generate CRC when + * resuming from suspend, so we set skip_frame_cnt 2. + */ + acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2; + } + } spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); } #endif @@ -11161,8 +11220,8 @@ dm_get_plane_scale(struct drm_plane_state *plane_state, int plane_src_w, plane_src_h; dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h); - *out_plane_scale_w = plane_state->crtc_w * 1000 / plane_src_w; - *out_plane_scale_h = plane_state->crtc_h * 1000 / plane_src_h; + *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0; + *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0; } /* @@ -11416,6 +11475,30 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev, return 0; } +static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, + struct drm_atomic_state *state, + struct drm_crtc_state *crtc_state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state, *old_plane_state; + + drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { + new_plane_state = drm_atomic_get_plane_state(state, plane); + old_plane_state = drm_atomic_get_plane_state(state, plane); + + if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { + DRM_ERROR("Failed to get plane state for plane %s\n", plane->name); + return false; + } + + if (old_plane_state->fb && new_plane_state->fb && + get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) + return true; + } + + return false; +} + /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. * @@ -11613,10 +11696,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) { - if (old_plane_state->fb && new_plane_state->fb && - get_mem_type(old_plane_state->fb) != - get_mem_type(new_plane_state->fb)) - lock_and_validation_needed = true; ret = dm_update_plane_state(dc, state, plane, old_plane_state, @@ -11911,9 +11990,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, /* * Only allow async flips for fast updates that don't change - * the FB pitch, the DCC state, rotation, etc. + * the FB pitch, the DCC state, rotation, mem_type, etc. */ - if (new_crtc_state->async_flip && lock_and_validation_needed) { + if (new_crtc_state->async_flip && + (lock_and_validation_needed || + amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] async flips are only supported for fast updates\n", crtc->base.id, crtc->name); @@ -12245,10 +12326,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { - amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; - amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { + amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + } + parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (vsdb_info.replay_mode) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index e46e1365fe91..d2703ca7dff3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -671,6 +671,8 @@ struct amdgpu_dm_connector { uint32_t connector_id; int bl_idx; + struct cec_notifier *notifier; + /* we need to mind the EDID between detect and get modes due to analog/digital/tvencoder */ const struct drm_edid *drm_edid; @@ -697,6 +699,8 @@ struct amdgpu_dm_connector { struct drm_dp_mst_port *mst_output_port; struct amdgpu_dm_connector *mst_root; struct drm_dp_aux *dsc_aux; + uint32_t mst_local_bw; + uint16_t vc_full_pbn; struct mutex handle_mst_msg_ready; /* TODO see if we can merge with ddc_bus or make a dm_connector */ @@ -1010,4 +1014,8 @@ void dm_free_gpu_mem(struct amdgpu_device *adev, bool amdgpu_dm_is_headless(struct amdgpu_device *adev); +void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector); +void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector); +int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector); + #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 6fdc306a4a86..033bd817d871 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -30,6 +30,7 @@ #include "amdgpu_dm.h" #include "dc.h" #include "amdgpu_securedisplay.h" +#include "amdgpu_dm_psr.h" static const char *const pipe_crc_sources[] = { "none", @@ -295,33 +296,41 @@ static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_st struct drm_device *drm_dev = crtc->dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - bool was_activated; struct amdgpu_dm_connector *aconnector; + bool was_activated; uint8_t phy_id; + unsigned long flags; + int i; - spin_lock_irq(&drm_dev->event_lock); - was_activated = acrtc->dm_irq_params.window_param.activated; - acrtc->dm_irq_params.window_param.x_start = 0; - acrtc->dm_irq_params.window_param.y_start = 0; - acrtc->dm_irq_params.window_param.x_end = 0; - acrtc->dm_irq_params.window_param.y_end = 0; - acrtc->dm_irq_params.window_param.activated = false; - acrtc->dm_irq_params.window_param.update_win = false; - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; - spin_unlock_irq(&drm_dev->event_lock); + spin_lock_irqsave(&drm_dev->event_lock, flags); + was_activated = acrtc->dm_irq_params.crc_window_activated; + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + acrtc->dm_irq_params.window_param[i].x_start = 0; + acrtc->dm_irq_params.window_param[i].y_start = 0; + acrtc->dm_irq_params.window_param[i].x_end = 0; + acrtc->dm_irq_params.window_param[i].y_end = 0; + acrtc->dm_irq_params.window_param[i].enable = false; + acrtc->dm_irq_params.window_param[i].update_win = false; + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0; + } + acrtc->dm_irq_params.crc_window_activated = false; + spin_unlock_irqrestore(&drm_dev->event_lock, flags); /* Disable secure_display if it was enabled */ - if (was_activated) { + if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) { /* stop ROI update on this crtc */ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work); flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work); - aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (aconnector && get_phy_id(dm, aconnector, &phy_id)) - dc_stream_forward_crc_window(stream, NULL, phy_id, true); - else + if (aconnector && get_phy_id(dm, aconnector, &phy_id)) { + if (dm->secure_display_ctx.support_mul_roi) + dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true); + else + dc_stream_forward_crc_window(stream, NULL, phy_id, true); + } else { DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__); + } } } @@ -335,7 +344,11 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) struct amdgpu_dm_connector *aconnector; uint8_t phy_inst; struct amdgpu_display_manager *dm; + struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM]; + unsigned long flags; + uint8_t roi_idx = 0; int ret; + int i; crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work); crtc = crtc_ctx->crtc; @@ -364,18 +377,36 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) } mutex_unlock(&crtc->dev->mode_config.mutex); + spin_lock_irqsave(&crtc->dev->event_lock, flags); + memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + /* need lock for multiple crtcs to use the command buffer */ mutex_lock(&psp->securedisplay_context.mutex); - - psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, - TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); - - securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; - /* PSP TA is expected to finish data transmission over I2C within current frame, * even there are up to 4 crtcs request to send in this frame. */ - ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + if (dm->secure_display_ctx.support_mul_roi) { + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); + + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst; + + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + if (crc_cpy[i].crc_ready) + roi_idx |= 1 << i; + } + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx; + + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); + } else { + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + + securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; + + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); + } if (!ret) { if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) @@ -393,6 +424,8 @@ amdgpu_dm_forward_crc_window(struct work_struct *work) struct drm_crtc *crtc; struct dc_stream_state *stream; struct amdgpu_dm_connector *aconnector; + struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM]; + unsigned long flags; uint8_t phy_id; crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work); @@ -416,9 +449,17 @@ amdgpu_dm_forward_crc_window(struct work_struct *work) } mutex_unlock(&crtc->dev->mode_config.mutex); + spin_lock_irqsave(&crtc->dev->event_lock, flags); + memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + mutex_lock(&dm->dc_lock); - dc_stream_forward_crc_window(stream, &crtc_ctx->rect, - phy_id, false); + if (dm->secure_display_ctx.support_mul_roi) + dc_stream_forward_multiple_crc_window(stream, roi_cpy, + phy_id, false); + else + dc_stream_forward_crc_window(stream, &roi_cpy[0].rect, + phy_id, false); mutex_unlock(&dm->dc_lock); } @@ -429,7 +470,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc) bool ret = false; spin_lock_irq(&drm_dev->event_lock); - ret = acrtc->dm_irq_params.window_param.activated; + ret = acrtc->dm_irq_params.crc_window_activated; spin_unlock_irq(&drm_dev->event_lock); return ret; @@ -467,10 +508,14 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); + /* For PSR1, check that the panel has exited PSR */ + if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) + amdgpu_dm_psr_wait_disable(stream_state); + /* Enable or disable CRTC CRC generation */ if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, NULL, enable, enable)) { + stream_state, NULL, enable, enable, 0, true)) { ret = -EINVAL; goto unlock; } @@ -604,6 +649,17 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) } + /* + * Reading the CRC requires the vblank interrupt handler to be + * enabled. Keep a reference until CRC capture stops. + */ + enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); + if (!enabled && enable) { + ret = drm_crtc_vblank_get(crtc); + if (ret) + goto cleanup; + } + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /* Reset secure_display when we change crc source from debugfs */ amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream); @@ -614,16 +670,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) goto cleanup; } - /* - * Reading the CRC requires the vblank interrupt handler to be - * enabled. Keep a reference until CRC capture stops. - */ - enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); if (!enabled && enable) { - ret = drm_crtc_vblank_get(crtc); - if (ret) - goto cleanup; - if (dm_is_crc_source_dprx(source)) { if (drm_dp_start_crc(aux, crtc)) { DRM_DEBUG_DRIVER("dp start crc failed\n"); @@ -651,7 +698,8 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /* Initialize phy id mapping table for secure display*/ - if (!dm->secure_display_ctx.phy_mapping_updated) + if (dm->secure_display_ctx.op_mode == LEGACY_MODE && + !dm->secure_display_ctx.phy_mapping_updated) update_phy_id_mapping(adev); #endif @@ -709,7 +757,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc) } if (dm_is_crc_source_crtc(cur_crc_src)) { - if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0, &crcs[0], &crcs[1], &crcs[2])) return; @@ -726,7 +774,16 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) struct amdgpu_crtc *acrtc = NULL; struct amdgpu_device *adev = NULL; struct secure_display_crtc_context *crtc_ctx = NULL; + bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false}; + uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0}; + uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0}; + uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0}; unsigned long flags1; + bool forward_roi_change = false; + bool notify_ta = false; + bool all_crc_ready = true; + struct dc_stream_state *stream_state; + int i; if (crtc == NULL) return; @@ -734,21 +791,21 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) acrtc = to_amdgpu_crtc(crtc); adev = drm_to_adev(crtc->dev); drm_dev = crtc->dev; + stream_state = to_dm_crtc_state(crtc->state)->stream; spin_lock_irqsave(&drm_dev->event_lock, flags1); cur_crc_src = acrtc->dm_irq_params.crc_src; /* Early return if CRC capture is not enabled. */ if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) || - !dm_is_crc_source_crtc(cur_crc_src)) - goto cleanup; - - if (!acrtc->dm_irq_params.window_param.activated) - goto cleanup; + !dm_is_crc_source_crtc(cur_crc_src)) { + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + return; + } - if (acrtc->dm_irq_params.window_param.skip_frame_cnt) { - acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1; - goto cleanup; + if (!acrtc->dm_irq_params.crc_window_activated) { + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + return; } crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id]; @@ -759,32 +816,110 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) crtc_ctx->crtc = crtc; } - if (acrtc->dm_irq_params.window_param.update_win) { - /* prepare work for dmub to update ROI */ - crtc_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start; - crtc_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start; - crtc_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end - - acrtc->dm_irq_params.window_param.x_start; - crtc_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end - - acrtc->dm_irq_params.window_param.y_start; - schedule_work(&crtc_ctx->forward_roi_work); + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + struct crc_params crc_window = { + .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start, + .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start, + .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end, + .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end, + .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start, + .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start, + .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end, + .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end, + }; + + crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable; + + if (!acrtc->dm_irq_params.window_param[i].enable) { + crtc_ctx->crc_info.crc[i].crc_ready = false; + continue; + } - acrtc->dm_irq_params.window_param.update_win = false; + if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) { + acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1; + crtc_ctx->crc_info.crc[i].crc_ready = false; + continue; + } - /* Statically skip 1 frame, because we may need to wait below things - * before sending ROI to dmub: - * 1. We defer the work by using system workqueue. - * 2. We may need to wait for dc_lock before accessing dmub. - */ - acrtc->dm_irq_params.window_param.skip_frame_cnt = 1; + if (acrtc->dm_irq_params.window_param[i].update_win) { + crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start; + crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start; + crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end - + crc_window.windowa_x_start; + crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end - + crc_window.windowa_y_start; + + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) + /* forward task to dmub to update ROI */ + forward_roi_change = true; + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) + /* update ROI via dm*/ + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, + &crc_window, true, true, i, false); + + reset_crc_frame_count[i] = true; + + acrtc->dm_irq_params.window_param[i].update_win = false; + + /* Statically skip 1 frame, because we may need to wait below things + * before sending ROI to dmub: + * 1. We defer the work by using system workqueue. + * 2. We may need to wait for dc_lock before accessing dmub. + */ + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1; + crtc_ctx->crc_info.crc[i].crc_ready = false; + } else { + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i, + &crc_r[i], &crc_g[i], &crc_b[i])) + DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i); + + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) + /* forward task to psp to read ROI/CRC and output via I2C */ + notify_ta = true; + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) + /* Avoid ROI window get changed, keep overwriting. */ + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, + &crc_window, true, true, i, false); + + /* crc ready for psp to read out */ + crtc_ctx->crc_info.crc[i].crc_ready = true; + } + } - } else { - /* prepare work for psp to read ROI/CRC and send to I2C */ + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + + if (forward_roi_change) + schedule_work(&crtc_ctx->forward_roi_work); + + if (notify_ta) schedule_work(&crtc_ctx->notify_ta_work); + + spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1); + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + crtc_ctx->crc_info.crc[i].crc_R = crc_r[i]; + crtc_ctx->crc_info.crc[i].crc_G = crc_g[i]; + crtc_ctx->crc_info.crc[i].crc_B = crc_b[i]; + + if (!crtc_ctx->roi[i].enable) { + crtc_ctx->crc_info.crc[i].frame_count = 0; + continue; + } + + if (!crtc_ctx->crc_info.crc[i].crc_ready) + all_crc_ready = false; + + if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX) + /* Reset the reference frame count after user update the ROI + * or it reaches the maximum value. + */ + crtc_ctx->crc_info.crc[i].frame_count = 0; + else + crtc_ctx->crc_info.crc[i].frame_count += 1; } + spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1); -cleanup: - spin_unlock_irqrestore(&drm_dev->event_lock, flags1); + if (all_crc_ready) + complete_all(&crtc_ctx->crc_info.completion); } void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) @@ -805,9 +940,11 @@ void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window); INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base; + spin_lock_init(&crtc_ctx[i].crc_info.lock); } adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx; - return; + + adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 4ec2510bc312..3da056c8d20b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -42,6 +42,14 @@ enum amdgpu_dm_pipe_crc_source { #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY #define MAX_CRTC 6 +enum secure_display_mode { + /* via dmub + psp */ + LEGACY_MODE = 0, + /* driver directly */ + DISPLAY_CRC_MODE, + SECURE_DISPLAY_MODE_MAX, +}; + struct phy_id_mapping { bool assigned; bool is_mst; @@ -51,13 +59,27 @@ struct phy_id_mapping { u8 rad[8]; }; +struct crc_data { + uint32_t crc_R; + uint32_t crc_G; + uint32_t crc_B; + uint32_t frame_count; + bool crc_ready; +}; + +struct crc_info { + struct crc_data crc[MAX_CRC_WINDOW_NUM]; + struct completion completion; + spinlock_t lock; +}; + struct crc_window_param { uint16_t x_start; uint16_t y_start; uint16_t x_end; uint16_t y_end; /* CRC window is activated or not*/ - bool activated; + bool enable; /* Update crc window during vertical blank or not */ bool update_win; /* skip reading/writing for few frames */ @@ -74,13 +96,17 @@ struct secure_display_crtc_context { struct drm_crtc *crtc; /* Region of Interest (ROI) */ - struct rect rect; + struct crc_window roi[MAX_CRC_WINDOW_NUM]; + + struct crc_info crc_info; }; struct secure_display_context { struct secure_display_crtc_context *crtc_ctx; - + /* Whether dmub support multiple ROI setting */ + bool support_mul_roi; + enum secure_display_mode op_mode; bool phy_mapping_updated; int phy_id_mapping_cnt; struct phy_id_mapping phy_id_mapping[MAX_CRTC]; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 64a041c2af05..36a830a7440f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -93,7 +93,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable) return rc; } -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) { return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; @@ -142,7 +142,7 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( amdgpu_dm_replay_enable(vblank_work->stream, true); } else if (vblank_enabled) { if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) - amdgpu_dm_psr_disable(vblank_work->stream); + amdgpu_dm_psr_disable(vblank_work->stream, false); } else if (link->psr_settings.psr_feature_enabled && allow_sr_entry && !is_sr_active && !is_crc_window_active) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h index 17e948753f59..c1212947a77b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable); bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc); -bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state); +bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state); int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index bc3e7a82b402..049046c60462 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -25,6 +25,7 @@ #include <linux/string_helpers.h> #include <linux/uaccess.h> +#include <media/cec-notifier.h> #include "dc.h" #include "amdgpu.h" @@ -2848,6 +2849,67 @@ static int is_dpia_link_show(struct seq_file *m, void *data) return 0; } +/** + * hdmi_cec_state_show - Read out the HDMI-CEC feature status + * @m: sequence file. + * @data: unused. + * + * Return 0 on success + */ +static int hdmi_cec_state_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + + seq_printf(m, "%s:%d\n", connector->name, connector->base.id); + seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0); + + return 0; +} + +/** + * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side + * @f: file structure. + * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature. + * @size: size of buffer from userpsace. + * @pos: unused. + * + * Return size on success, error code on failure + */ +static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int ret; + bool enable; + struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; + struct drm_device *ddev = aconnector->base.dev; + + if (size == 0) + return -EINVAL; + + ret = kstrtobool_from_user(buf, size, &enable); + if (ret) { + drm_dbg_driver(ddev, "invalid user data !\n"); + return ret; + } + + if (enable) { + if (aconnector->notifier) + return -EINVAL; + ret = amdgpu_dm_initialize_hdmi_connector(aconnector); + if (ret) + return ret; + hdmi_cec_set_edid(aconnector); + } else { + if (!aconnector->notifier) + return -EINVAL; + cec_notifier_conn_unregister(aconnector->notifier); + aconnector->notifier = NULL; + } + + return size; +} + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2860,6 +2922,7 @@ DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); DEFINE_SHOW_ATTRIBUTE(is_dpia_link); +DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2995,7 +3058,8 @@ static const struct { char *name; const struct file_operations *fops; } hdmi_debugfs_entries[] = { - {"hdcp_sink_capability", &hdcp_sink_capability_fops} + {"hdcp_sink_capability", &hdcp_sink_capability_fops}, + {"hdmi_cec_state", &hdmi_cec_state_fops} }; /* @@ -3480,8 +3544,8 @@ static int crc_win_x_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.x_start = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3497,7 +3561,7 @@ static int crc_win_x_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.x_start; + *val = acrtc->dm_irq_params.window_param[0].x_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3517,8 +3581,8 @@ static int crc_win_y_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.y_start = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3534,7 +3598,7 @@ static int crc_win_y_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.y_start; + *val = acrtc->dm_irq_params.window_param[0].y_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3553,8 +3617,8 @@ static int crc_win_x_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.x_end = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3570,7 +3634,7 @@ static int crc_win_x_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.x_end; + *val = acrtc->dm_irq_params.window_param[0].x_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3589,8 +3653,8 @@ static int crc_win_y_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.window_param.y_end = (uint16_t) val; - acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val; + acrtc->dm_irq_params.window_param[0].update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3606,7 +3670,7 @@ static int crc_win_y_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.window_param.y_end; + *val = acrtc->dm_irq_params.window_param[0].y_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3629,13 +3693,14 @@ static int crc_win_update_set(void *data, u64 val) /* PSR may write to OTG CRC window control register, * so close it before starting secure_display. */ - amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream); + amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true); spin_lock_irq(&adev_to_drm(adev)->event_lock); - acrtc->dm_irq_params.window_param.activated = true; - acrtc->dm_irq_params.window_param.update_win = true; - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; + acrtc->dm_irq_params.window_param[0].enable = true; + acrtc->dm_irq_params.window_param[0].update_win = true; + acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0; + acrtc->dm_irq_params.crc_window_activated = true; spin_unlock_irq(&adev_to_drm(adev)->event_lock); mutex_unlock(&adev->dm.dc_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 6cbbb71d752b..fbd80d8545a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -885,6 +885,12 @@ bool dm_helpers_dp_write_dsc_enable( return ret; } +bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream) +{ + // TODO + return false; +} + bool dm_helpers_is_dp_sink_present(struct dc_link *link) { bool dp_sink_present; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index 6a7ecc1e4602..6c9de834455b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -39,7 +39,9 @@ struct dm_irq_params { #ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source crc_src; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY - struct crc_window_param window_param; + struct crc_window_param window_param[MAX_CRC_WINDOW_NUM]; + /* At least one CRC window is activated or not*/ + bool crc_window_activated; #endif #endif }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 56fa2b9b5d7a..07e744da7bf4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -155,6 +155,17 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) return 0; } + +static inline void +amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector) +{ + aconnector->drm_edid = NULL; + aconnector->dsc_aux = NULL; + aconnector->mst_output_port->passthrough_aux = NULL; + aconnector->mst_local_bw = 0; + aconnector->vc_full_pbn = 0; +} + static void amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) { @@ -182,9 +193,7 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) dc_sink_release(dc_sink); aconnector->dc_sink = NULL; - aconnector->drm_edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); } aconnector->mst_status = MST_STATUS_DEFAULT; @@ -504,9 +513,7 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; - aconnector->drm_edid = NULL; - aconnector->dsc_aux = NULL; - port->passthrough_aux = NULL; + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); amdgpu_dm_set_mst_status(&aconnector->mst_status, MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, @@ -1820,9 +1827,18 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct drm_dp_mst_port *immediate_upstream_port = NULL; uint32_t end_link_bw = 0; - /*Get last DP link BW capability*/ - if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { - if (stream_kbps > end_link_bw) { + /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/ + if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV && + aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) { + if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) { + dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw); + aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn; + aconnector->mst_local_bw = end_link_bw; + } else { + end_link_bw = aconnector->mst_local_bw; + } + + if (end_link_bw > 0 && stream_kbps > end_link_bw) { DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; @@ -1836,11 +1852,15 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (immediate_upstream_port) { virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); - if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." - "Max dsc compression can't fit into MST available bw\n"); - return DC_FAIL_BANDWIDTH_VALIDATE; - } + } else { + /* For topology LCT 1 case - only one mstb*/ + virtual_channel_bw_in_kbps = root_link_bw_in_kbps; + } + + if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Max dsc compression can't fit into MST available bw\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 1ec4e4b9ea94..774cc3f4f3fd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -177,7 +177,7 @@ static unsigned int amdgpu_dm_plane_modifier_gfx9_swizzle_mode(uint64_t modifier return AMD_FMT_MOD_GET(TILE, modifier); } -static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, +static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info, uint64_t tiling_flags) { /* Fill GFX8 params */ @@ -190,6 +190,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + tiling_info->gfxversion = DcGfxVersion8; /* XXX fix me for VI */ tiling_info->gfx8.num_banks = num_banks; tiling_info->gfx8.array_mode = @@ -210,7 +211,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf } static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info) + struct dc_tiling_info *tiling_info) { /* Fill GFX9 params */ tiling_info->gfx9.num_pipes = @@ -231,7 +232,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgp } static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, uint64_t modifier) { unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); @@ -261,7 +262,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amd static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, - const union dc_tiling_info *tiling_info, + const struct dc_tiling_info *tiling_info, const struct dc_plane_dcc_param *dcc, const struct dc_plane_address *address, const struct plane_size *plane_size) @@ -308,7 +309,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address) { @@ -317,6 +318,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); + tiling_info->gfxversion = DcGfxVersion9; if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { uint64_t dcc_address = afb->address + afb->base.offsets[1]; @@ -358,7 +360,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address) { @@ -369,6 +371,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); + tiling_info->gfxversion = DcGfxAddr3; if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); @@ -834,7 +837,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 2eef13b1c05a..615d2ab2b803 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -47,7 +47,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f40240aafe98..45858bf1523d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -201,14 +201,13 @@ void amdgpu_dm_psr_enable(struct dc_stream_state *stream) * * Return: true if success */ -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait) { - unsigned int power_opt = 0; bool psr_enable = false; DRM_DEBUG_DRIVER("Disabling psr...\n"); - return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt); + return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL); } /* @@ -251,3 +250,33 @@ bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm) return allow_active; } + +/** + * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR + * @stream: stream state attached to the eDP link + * + * Waits for a max of 500ms for the eDP panel to exit PSR. + * + * Return: true if panel exited PSR, false otherwise. + */ +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream) +{ + enum dc_psr_state psr_state = PSR_STATE0; + struct dc_link *link = stream->link; + int retry_count; + + if (link == NULL) + return false; + + for (retry_count = 0; retry_count <= 1000; retry_count++) { + dc_link_get_psr_state(link, &psr_state); + if (psr_state == PSR_STATE0) + break; + udelay(500); + } + + if (retry_count == 1000) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index cd2d45c2b5ef..e2366321a3c1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -34,8 +34,9 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link); void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); -bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); +bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm); +bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream); #endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index c9a6de110b74..a62f6c51301c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3088,11 +3088,12 @@ static enum bp_result construct_integrated_info( info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id, info->ext_disp_conn_info.path[i].caps ); - if (info->ext_disp_conn_info.path[i].caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) - DC_LOG_BIOS("BIOS EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); + if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) + DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); else if (bp->base.ctx->dc->config.force_bios_fixed_vs) { - info->ext_disp_conn_info.path[i].caps |= EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; - DC_LOG_BIOS("driver forced EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); + info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; + DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); } } // Log the Checksum and Voltage Swing diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 2a74140d7ebf..1648226586e2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -89,7 +89,7 @@ #define mmCLK1_CLK4_ALLOW_DS 0x16EA8 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 -#define mmCLK5_spll_field_8 0x1B04B +#define mmCLK5_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 @@ -401,6 +401,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk) dcn35_smu_set_dtbclk(clk_mgr, false); + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; } /* check that we're not already in lower */ @@ -418,11 +419,17 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { - dcn35_smu_set_dtbclk(clk_mgr, true); - clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + int actual_dtbclk = 0; dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); - clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; + dcn35_smu_set_dtbclk(clk_mgr, true); + + actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT); + + if (actual_dtbclk) { + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + } } /* check that we're not already in D0 */ @@ -584,12 +591,10 @@ static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) static void init_clk_states(struct clk_mgr *clk_mgr) { - struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); - if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) - clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; @@ -600,6 +605,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + init_clk_states(clk_mgr); // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h index dbfdd3487da5..2e0d34fd7512 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h @@ -43,7 +43,9 @@ #define DALSMC_MSG_ActiveUclkFclk 0x18 #define DALSMC_MSG_IdleUclkFclk 0x19 #define DALSMC_MSG_SetUclkPstateAllow 0x1A -#define DALSMC_Message_Count 0x1B +#define DALSMC_MSG_SubvpUclkFclk 0x1B +#define DALSMC_MSG_GetNumUmcChannels 0x1C +#define DALSMC_Message_Count 0x1D typedef enum { FCLK_SWITCH_DISALLOW, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 5b4e1e8a9ae2..8082bb877611 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -628,207 +628,6 @@ static void dcn401_update_clocks_update_dentist( } -static void dcn401_update_clocks_legacy(struct clk_mgr *clk_mgr_base, - struct dc_state *context, - bool safe_to_lower) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; - struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; - bool update_dppclk = false; - bool update_dispclk = false; - bool enter_display_off = false; - bool dpp_clock_lowered = false; - struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - bool force_reset = false; - bool update_uclk = false, update_fclk = false; - bool p_state_change_support; - bool fclk_p_state_change_support; - int total_plane_count; - - if (dc->work_arounds.skip_clock_update) - return; - - if (clk_mgr_base->clks.dispclk_khz == 0 || - (dc->debug.force_clock_mode & 0x1)) { - /* This is from resume or boot up, if forced_clock cfg option used, - * we bypass program dispclk and DPPCLK, but need set them for S3. - */ - force_reset = true; - - dcn2_read_clocks_from_hw_dentist(clk_mgr_base); - - /* Force_clock_mode 0x1: force reset the clock even it is the same clock - * as long as it is in Passive level. - */ - } - display_count = clk_mgr_helper_get_active_display_cnt(dc, context); - - if (display_count == 0) - enter_display_off = true; - - if (clk_mgr->smu_present) { - if (enter_display_off == safe_to_lower) - dcn401_smu_set_num_of_displays(clk_mgr, display_count); - - clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support; - - total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); - fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0); - - if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support)) { - clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support; - - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ - if (clk_mgr_base->clks.fclk_p_state_change_support) { - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ - dcn401_smu_send_fclk_pstate_message(clk_mgr, true); - } - } - - if (dc->debug.force_min_dcfclk_mhz > 0) - new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? - new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); - - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { - clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); - } - - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { - clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) - dcn401_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); - } - - if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) - /* We don't actually care about socclk, don't notify SMU of hard min */ - clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; - - clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; - clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways; - - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && - clk_mgr_base->clks.num_ways < new_clocks->num_ways) { - clk_mgr_base->clks.num_ways = new_clocks->num_ways; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); - } - - - p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); - if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) { - clk_mgr_base->clks.p_state_change_support = p_state_change_support; - clk_mgr_base->clks.fw_based_mclk_switching = p_state_change_support && new_clocks->fw_based_mclk_switching; - - /* to disable P-State switching, set UCLK min = max */ - if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); - } - - /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */ - if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) { - update_fclk = true; - } - - if (!clk_mgr_base->clks.fclk_p_state_change_support && - update_fclk && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_FCLK)) { - /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */ - dcn401_smu_send_fclk_pstate_message(clk_mgr, false); - } - - /* Always update saved value, even if new value not set due to P-State switching unsupported */ - if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { - clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; - update_uclk = true; - } - - /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ - if (clk_mgr_base->clks.p_state_change_support && - (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && - clk_mgr_base->clks.num_ways > new_clocks->num_ways) { - clk_mgr_base->clks.num_ways = new_clocks->num_ways; - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); - } - } - - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { - if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) - dpp_clock_lowered = true; - - clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; - clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz; - - if (clk_mgr->smu_present && !dpp_clock_lowered && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, clk_mgr_base->clks.dppclk_khz); - update_dppclk = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK)) - clk_mgr_base->clks.actual_dispclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, clk_mgr_base->clks.dispclk_khz); - - update_dispclk = true; - } - - if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { - new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; - } - - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ - if (!dc->debug.disable_dtb_ref_clk_switch && - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { - /* DCCG requires KHz precision for DTBCLK */ - clk_mgr_base->clks.ref_dtbclk_khz = - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); - - dcn401_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); - } - - if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { - if (dpp_clock_lowered) { - /* if clock is being lowered, increase DTO before lowering refclk */ - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, - safe_to_lower, clk_mgr_base->clks.dppclk_khz); - dcn401_update_clocks_update_dentist(clk_mgr, context); - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) { - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, - clk_mgr_base->clks.dppclk_khz); - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower, - clk_mgr_base->clks.actual_dppclk_khz); - } - - } else { - /* if clock is being raised, increase refclk before lowering DTO */ - if (update_dppclk || update_dispclk) - dcn401_update_clocks_update_dentist(clk_mgr, context); - /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures - * that we do not lower dto when it is not safe to lower. We do not need to - * compare the current and new dppclk before calling this function. - */ - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, - safe_to_lower, clk_mgr_base->clks.actual_dppclk_khz); - } - } - - if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) - /*update dmcu for wait_loop count*/ - dmcu->funcs->set_psr_wait_loop(dmcu, - clk_mgr_base->clks.dispclk_khz / 1000 / 7); -} - static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -1008,15 +807,15 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( update_active_fclk = true; update_idle_fclk = true; - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ - if (clk_mgr_base->clks.fclk_p_state_change_support) { - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ - if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { - block_sequence[num_steps].params.update_pstate_support_params.support = true; - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; - num_steps++; - } - } + /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/ + // if (clk_mgr_base->clks.fclk_p_state_change_support) { + // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ + // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { + // block_sequence[num_steps].params.update_pstate_support_params.support = true; + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; + // num_steps++; + // } + // } } if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { @@ -1224,14 +1023,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( // (*num_steps)++; // } - /* disable FCLK P-State support if needed */ - if (!fclk_p_state_change_support && - should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && - dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { - block_sequence[num_steps].params.update_pstate_support_params.support = false; - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; - num_steps++; - } + /* disable FCLK P-State support if needed (message not supported on DCN401)*/ + // if (!fclk_p_state_change_support && + // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && + // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { + // block_sequence[num_steps].params.update_pstate_support_params.support = false; + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; + // num_steps++; + // } } if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching && @@ -1412,11 +1211,6 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, unsigned int num_steps = 0; - if (dc->debug.enable_legacy_clock_update) { - dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower); - return; - } - /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, @@ -1551,6 +1345,20 @@ static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool curren dcn401_execute_block_sequence(clk_mgr_base, num_steps); } +static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz; +} + +static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz; +} + /* Get current memclk states, update bounding box */ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) { @@ -1595,6 +1403,15 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) if (clk_mgr->dpm_present && !num_levels) clk_mgr->dpm_present = false; + clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr); + if (clk_mgr_base->ctx->dc_bios) { + /* use BIOS values if none provided by PMFW */ + if (clk_mgr_base->bw_params->num_channels == 0) { + clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans; + } + clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes; + } + /* Refresh bounding box */ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); @@ -1684,6 +1501,8 @@ static struct clk_mgr_funcs dcn401_funcs = { .enable_pme_wa = dcn401_enable_pme_wa, .is_smu_present = dcn401_is_smu_present, .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist, + .get_hard_min_memclk = dcn401_get_hard_min_memclk, + .get_hard_min_fclk = dcn401_get_hard_min_fclk, }; struct clk_mgr_internal *dcn401_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index b02a41179b41..21c35528f61f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -25,6 +25,9 @@ #ifndef DALSMC_MSG_SubvpUclkFclk #define DALSMC_MSG_SubvpUclkFclk 0x1B #endif +#ifndef DALSMC_MSG_GetNumUmcChannels +#define DALSMC_MSG_GetNumUmcChannels 0x1C +#endif /* * Function to be used instead of REG_WAIT macro because the wait ends when @@ -334,3 +337,14 @@ void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t n dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_NumOfDisplays, num_displays, NULL); } + +unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr) +{ + unsigned int response = 0; + + dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response); + + smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index 42cf7885a7cb..e02eb1294b37 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -28,5 +28,6 @@ bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, uint16_t fclk_freq_mhz); void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); +unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index dfa36368ae63..cecaadf741ad 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -615,6 +615,68 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, return true; } + +static void +dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv, + struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop) +{ + int i; + union dmub_rb_cmd cmd = {0}; + + cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num; + cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num; + + cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY; + + if (stop) { + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE; + } else { + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY; + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height; + cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable; + } + } + + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); +} + +bool +dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, + struct crc_window *window, uint8_t phy_id, bool stop) +{ + struct dc_dmub_srv *dmub_srv; + struct otg_phy_mux mux_mapping; + struct pipe_ctx *pipe; + int i; + struct dc *dc = stream->ctx->dc; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) + break; + } + + /* Stream not found */ + if (i == MAX_PIPES) + return false; + + mux_mapping.phy_output_num = phy_id; + mux_mapping.otg_output_num = pipe->stream_res.tg->inst; + + dmub_srv = dc->ctx->dmub_srv; + + /* forward to dmub only. no dmcu support*/ + if (dmub_srv) + dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop); + else + return false; + + return true; +} #endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ /** @@ -625,15 +687,17 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, * @enable: Enable CRC if true, disable otherwise. * @continuous: Capture CRC on every frame if true. Otherwise, only capture * once. + * @idx: Capture CRC on which CRC engine instance + * @reset: Reset CRC engine before the configuration * - * By default, only CRC0 is configured, and the entire frame is used to - * calculate the CRC. + * By default, the entire frame is used to calculate the CRC. * * Return: %false if the stream is not found or CRC capture is not supported; * %true if the stream has been configured. */ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, - struct crc_params *crc_window, bool enable, bool continuous) + struct crc_params *crc_window, bool enable, bool continuous, + uint8_t idx, bool reset) { struct pipe_ctx *pipe; struct crc_params param; @@ -677,6 +741,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, param.continuous_mode = continuous; param.enable = enable; + param.crc_eng_inst = idx; + param.reset = reset; + tg = pipe->stream_res.tg; /* Only call if supported */ @@ -691,6 +758,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, * * @dc: DC object. * @stream: The DC stream state of the stream to get CRCs from. + * @idx: index of crc engine to get CRC from * @r_cr: CRC value for the red component. * @g_y: CRC value for the green component. * @b_cb: CRC value for the blue component. @@ -700,7 +768,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, * Return: * %false if stream is not found, or if CRCs are not enabled. */ -bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, +bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { int i; @@ -721,7 +789,7 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, tg = pipe->stream_res.tg; if (tg->funcs->get_crc) - return tg->funcs->get_crc(tg, r_cr, g_y, b_cb); + return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb); DC_LOG_WARNING("CRC capture not supported."); return false; } @@ -1173,6 +1241,8 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2) get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC) + get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); } } } @@ -2492,7 +2562,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc *dc, if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, - sizeof(union dc_tiling_info)) != 0) { + sizeof(struct dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; elevate_update_type(&update_type, UPDATE_TYPE_MED); @@ -4519,7 +4589,7 @@ static bool commit_minimal_transition_based_on_current_context(struct dc *dc, struct pipe_split_policy_backup policy; struct dc_state *intermediate_context; struct dc_state *old_current_state = dc->current_state; - struct dc_surface_update srf_updates[MAX_SURFACE_NUM] = {0}; + struct dc_surface_update srf_updates[MAX_SURFACES] = {0}; int surface_count; /* @@ -5316,13 +5386,9 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc->vm_pa_config.valid) { dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); } - /*mark d0 last*/ - dc->power_state = power_state; break; default: ASSERT(dc->current_state->stream_count == 0); - /*mark d3 first*/ - dc->power_state = power_state; dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); dc_state_destruct(dc->current_state); @@ -5446,6 +5512,11 @@ bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips) void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name) { + int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0; + enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0}; + struct pipe_ctx *pipe = NULL; + struct dc_state *context = dc->current_state; + if (dc->debug.disable_idle_power_optimizations) { DC_LOG_DEBUG("%s: disabled\n", __func__); return; @@ -5470,6 +5541,23 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const dc->idle_optimizations_allowed = allow; DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled"); } + + // log idle clocks and sub vp pipe types at idle optimization time + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk) + idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr); + + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk) + idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + } + + DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", + __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2], + subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name); + } void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 252af83e34a5..6eb9bae3af91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -425,6 +425,44 @@ void get_hdr_visual_confirm_color( } } +/* Visual Confirm color definition for VABC */ +void get_vabc_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + struct dc_link *edp_link = NULL; + + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) { + if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP) + edp_link = pipe_ctx->stream->link; + } + + if (edp_link) { + switch (edp_link->backlight_control_type) { + case BACKLIGHT_CONTROL_PWM: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case BACKLIGHT_CONTROL_AMD_AUX: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case BACKLIGHT_CONTROL_VESA_AUX: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + } + } else { + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = 0; + } +} + void get_subvp_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index 457d60eeb486..c1b79b379447 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -125,6 +125,14 @@ uint32_t dc_link_bandwidth_kbps( return link->dc->link_srv->dp_link_bandwidth_kbps(link, link_settings); } +uint32_t dc_link_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params) +{ + return link->dc->link_srv->dp_required_hblank_size_bytes(link, + audio_params); +} + void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map) { dc->link_srv->get_cur_res_map(dc, map); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 626f75b6ad00..520a34a42827 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4478,7 +4478,7 @@ static void set_hfvs_info_packet( static void adaptive_sync_override_dp_info_packets_sdp_line_num( const struct dc_crtc_timing *timing, struct enc_sdp_line_num *sdp_line_num, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) + unsigned int vstartup_start) { uint32_t asic_blank_start = 0; uint32_t asic_blank_end = 0; @@ -4493,8 +4493,8 @@ static void adaptive_sync_override_dp_info_packets_sdp_line_num( asic_blank_end = (asic_blank_start - tg->v_border_bottom - tg->v_addressable - tg->v_border_top); - if (pipe_dlg_param->vstartup_start > asic_blank_end) { - v_update = (tg->v_total - (pipe_dlg_param->vstartup_start - asic_blank_end)); + if (vstartup_start > asic_blank_end) { + v_update = (tg->v_total - (vstartup_start - asic_blank_end)); sdp_line_num->adaptive_sync_line_num_valid = true; sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1); } else { @@ -4507,7 +4507,7 @@ static void set_adaptive_sync_info_packet( struct dc_info_packet *info_packet, const struct dc_stream_state *stream, struct encoder_info_frame *info_frame, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) + unsigned int vstartup_start) { if (!stream->adaptive_sync_infopacket.valid) return; @@ -4515,7 +4515,7 @@ static void set_adaptive_sync_info_packet( adaptive_sync_override_dp_info_packets_sdp_line_num( &stream->timing, &info_frame->sdp_line_num, - pipe_dlg_param); + vstartup_start); *info_packet = stream->adaptive_sync_infopacket; } @@ -4548,6 +4548,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) { enum signal_type signal = SIGNAL_TYPE_NONE; struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; + unsigned int vstartup_start = 0; /* default all packets to invalid */ info->avi.valid = false; @@ -4561,6 +4562,9 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) info->adaptive_sync.valid = false; signal = pipe_ctx->stream->signal; + if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe) + vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx); + /* HDMi and DP have different info packets*/ if (dc_is_hdmi_signal(signal)) { set_avi_info_frame(&info->avi, pipe_ctx); @@ -4582,7 +4586,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) set_adaptive_sync_info_packet(&info->adaptive_sync, pipe_ctx->stream, info, - &pipe_ctx->pipe_dlg_param); + vstartup_start); } patch_gamut_packet_checksum(&info->gamut); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e006f816ff2f..1b2cce127981 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -483,9 +483,9 @@ bool dc_state_add_plane( if (stream_status == NULL) { dm_error("Existing stream not found; failed to attach surface!\n"); goto out; - } else if (stream_status->plane_count == MAX_SURFACE_NUM) { + } else if (stream_status->plane_count == MAX_SURFACES) { dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", - plane_state, MAX_SURFACE_NUM); + plane_state, MAX_SURFACES); goto out; } else if (!otg_master_pipe) { goto out; @@ -600,7 +600,7 @@ bool dc_state_rem_all_planes_for_stream( { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < state->stream_count; i++) if (state->streams[i] == stream) { @@ -875,7 +875,7 @@ bool dc_state_rem_all_phantom_planes_for_stream( { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < state->stream_count; i++) if (state->streams[i] == phantom_stream) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 701b7e4f2920..e8134c47fe0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -37,6 +37,8 @@ #define DC_LOGGER dc->ctx->logger #ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif +#ifndef MAX #define MAX(x, y) ((x > y) ? x : y) #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8c6347413038..053481ab69ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,9 +55,9 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.314" +#define DC_VER "3.2.316" -#define MAX_SURFACES 3 +#define MAX_SURFACES 4 #define MAX_PLANES 6 #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 @@ -472,6 +472,7 @@ struct dc_config { bool disable_hbr_audio_dp2; bool consolidated_dpia_dp_lt; bool set_pipe_unlock_order; + bool enable_dpia_pre_training; }; enum visual_confirm { @@ -488,6 +489,7 @@ enum visual_confirm { VISUAL_CONFIRM_MCLK_SWITCH = 16, VISUAL_CONFIRM_FAMS2 = 19, VISUAL_CONFIRM_HW_CURSOR = 20, + VISUAL_CONFIRM_VABC = 21, }; enum dc_psr_power_opts { @@ -775,7 +777,8 @@ union dpia_debug_options { uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ - uint32_t reserved:25; + uint32_t enable_dpia_pre_training:1; /* bit 7 */ + uint32_t reserved:24; } bits; uint32_t raw; }; @@ -1060,7 +1063,6 @@ struct dc_debug_options { uint32_t dml21_disable_pstate_method_mask; union fw_assisted_mclk_switch_version fams_version; union dmub_fams2_global_feature_config fams2_config; - bool enable_legacy_clock_update; unsigned int force_cositing; unsigned int disable_spl; unsigned int force_easf; @@ -1305,7 +1307,7 @@ struct dc_plane_state { struct rect clip_rect; struct plane_size plane_size; - union dc_tiling_info tiling_info; + struct dc_tiling_info tiling_info; struct dc_plane_dcc_param dcc; @@ -1376,7 +1378,7 @@ struct dc_plane_state { struct dc_plane_info { struct plane_size plane_size; - union dc_tiling_info tiling_info; + struct dc_tiling_info tiling_info; struct dc_plane_dcc_param dcc; enum surface_pixel_format format; enum dc_rotation_angle rotation; @@ -1403,7 +1405,7 @@ struct dc_scratch_space { * store current value in plane states so we can still recover * a valid current state during dc update. */ - struct dc_plane_state plane_states[MAX_SURFACE_NUM]; + struct dc_plane_state plane_states[MAX_SURFACES]; struct dc_stream_state stream_state; }; @@ -2025,6 +2027,24 @@ uint32_t dc_link_bandwidth_kbps( const struct dc_link *link, const struct dc_link_settings *link_setting); +struct dp_audio_bandwidth_params { + const struct dc_crtc_timing *crtc_timing; + enum dp_link_encoding link_encoding; + uint32_t channel_count; + uint32_t sample_rate_hz; +}; + +/* The function calculates the minimum size of hblank (in bytes) needed to + * support the specified channel count and sample rate combination, given the + * link encoding and timing to be used. This calculation is not supported + * for 8b/10b SST. + * + * return - min hblank size in bytes, 0 if 8b/10b SST. + */ +uint32_t dc_link_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + /* The function takes a snapshot of current link resource allocation state * @dc: pointer to dc of the dm calling this * @map: a dc link resource snapshot defined internally to dc. @@ -2384,6 +2404,13 @@ struct dc_sink_dsc_caps { struct dsc_dec_dpcd_caps dsc_dec_caps; }; +struct dc_sink_hblank_expansion_caps { + // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology), + // 'false' if they are sink's HBlank expansion caps + bool is_virtual_dpcd_hblank_expansion; + struct hblank_expansion_dpcd_caps dpcd_caps; +}; + struct dc_sink_fec_caps { bool is_rx_fec_supported; bool is_topology_fec_supported; @@ -2410,6 +2437,7 @@ struct dc_sink { struct scdc_caps scdc_caps; struct dc_sink_dsc_caps dsc_caps; struct dc_sink_fec_caps fec_caps; + struct dc_sink_hblank_expansion_caps hblank_expansion_caps; bool is_vsc_sdp_colorimetry_supported; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 8dd6eb044829..94ce8fe74481 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -969,6 +969,21 @@ union dp_sink_video_fallback_formats { uint8_t raw; }; +union dp_receive_port0_cap { + struct { + uint8_t RESERVED :1; + uint8_t LOCAL_EDID_PRESENT :1; + uint8_t ASSOCIATED_TO_PRECEDING_PORT:1; + uint8_t HBLANK_EXPANSION_CAPABLE :1; + uint8_t BUFFER_SIZE_UNIT :1; + uint8_t BUFFER_SIZE_PER_PORT :1; + uint8_t HBLANK_REDUCTION_CAPABLE :1; + uint8_t RESERVED2:1; + uint8_t BUFFER_SIZE:8; + } bits; + uint8_t raw[2]; +}; + union dpcd_max_uncompressed_pixel_rate_cap { struct { uint16_t max_uncompressed_pixel_rate_cap :15; @@ -1193,6 +1208,7 @@ struct dpcd_caps { struct replay_info pr_info; uint16_t edp_oled_emission_rate; + union dp_receive_port0_cap receive_port0_cap; }; union dpcd_sink_ext_caps { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index 9014c2409817..9d18f1c08079 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -94,6 +94,11 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( const int num_slices_h, const bool is_dp); +void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, + const struct dsc_dec_dpcd_caps *dsc_sink_caps); +void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, + const struct dc_crtc_timing *timing); + /* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM, * and DM can choose to OVERRIDE the limitation on CASE BY CASE basis. * Hardware/specs limitation should not be writable by DM. diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index c10567ec1c81..5ac55601a6da 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -341,89 +341,101 @@ enum swizzle_mode_addr3_values { DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX }; -union dc_tiling_info { - - struct { - /* Specifies the number of memory banks for tiling - * purposes. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 2,4,8,16 - */ - unsigned int num_banks; - /* Specifies the number of tiles in the x direction - * to be incorporated into the same bank. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 1,2,4,8 - */ - unsigned int bank_width; - unsigned int bank_width_c; - /* Specifies the number of tiles in the y direction to - * be incorporated into the same bank. - * Only applies to 2D and 3D tiling modes. - * POSSIBLE VALUES: 1,2,4,8 - */ - unsigned int bank_height; - unsigned int bank_height_c; - /* Specifies the macro tile aspect ratio. Only applies - * to 2D and 3D tiling modes. - */ - unsigned int tile_aspect; - unsigned int tile_aspect_c; - /* Specifies the number of bytes that will be stored - * contiguously for each tile. - * If the tile data requires more storage than this - * amount, it is split into multiple slices. - * This field must not be larger than - * GB_ADDR_CONFIG.DRAM_ROW_SIZE. - * Only applies to 2D and 3D tiling modes. - * For color render targets, TILE_SPLIT >= 256B. - */ - enum tile_split_values tile_split; - enum tile_split_values tile_split_c; - /* Specifies the addressing within a tile. - * 0x0 - DISPLAY_MICRO_TILING - * 0x1 - THIN_MICRO_TILING - * 0x2 - DEPTH_MICRO_TILING - * 0x3 - ROTATED_MICRO_TILING - */ - enum tile_mode_values tile_mode; - enum tile_mode_values tile_mode_c; - /* Specifies the number of pipes and how they are - * interleaved in the surface. - * Refer to memory addressing document for complete - * details and constraints. - */ - unsigned int pipe_config; - /* Specifies the tiling mode of the surface. - * THIN tiles use an 8x8x1 tile size. - * THICK tiles use an 8x8x4 tile size. - * 2D tiling modes rotate banks for successive Z slices - * 3D tiling modes rotate pipes and banks for Z slices - * Refer to memory addressing document for complete - * details and constraints. - */ - enum array_mode_values array_mode; - } gfx8; +enum dc_gfxversion { + DcGfxVersion7 = 0, + DcGfxVersion8, + DcGfxVersion9, + DcGfxVersion10, + DcGfxVersion11, + DcGfxAddr3, + DcGfxVersionUnknown +}; + + struct dc_tiling_info { + unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum + union { + struct { + /* Specifies the number of memory banks for tiling + * purposes. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 2,4,8,16 + */ + unsigned int num_banks; + /* Specifies the number of tiles in the x direction + * to be incorporated into the same bank. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 1,2,4,8 + */ + unsigned int bank_width; + unsigned int bank_width_c; + /* Specifies the number of tiles in the y direction to + * be incorporated into the same bank. + * Only applies to 2D and 3D tiling modes. + * POSSIBLE VALUES: 1,2,4,8 + */ + unsigned int bank_height; + unsigned int bank_height_c; + /* Specifies the macro tile aspect ratio. Only applies + * to 2D and 3D tiling modes. + */ + unsigned int tile_aspect; + unsigned int tile_aspect_c; + /* Specifies the number of bytes that will be stored + * contiguously for each tile. + * If the tile data requires more storage than this + * amount, it is split into multiple slices. + * This field must not be larger than + * GB_ADDR_CONFIG.DRAM_ROW_SIZE. + * Only applies to 2D and 3D tiling modes. + * For color render targets, TILE_SPLIT >= 256B. + */ + enum tile_split_values tile_split; + enum tile_split_values tile_split_c; + /* Specifies the addressing within a tile. + * 0x0 - DISPLAY_MICRO_TILING + * 0x1 - THIN_MICRO_TILING + * 0x2 - DEPTH_MICRO_TILING + * 0x3 - ROTATED_MICRO_TILING + */ + enum tile_mode_values tile_mode; + enum tile_mode_values tile_mode_c; + /* Specifies the number of pipes and how they are + * interleaved in the surface. + * Refer to memory addressing document for complete + * details and constraints. + */ + unsigned int pipe_config; + /* Specifies the tiling mode of the surface. + * THIN tiles use an 8x8x1 tile size. + * THICK tiles use an 8x8x4 tile size. + * 2D tiling modes rotate banks for successive Z slices + * 3D tiling modes rotate pipes and banks for Z slices + * Refer to memory addressing document for complete + * details and constraints. + */ + enum array_mode_values array_mode; + } gfx8; - struct { - enum swizzle_mode_values swizzle; - unsigned int num_pipes; - unsigned int max_compressed_frags; - unsigned int pipe_interleave; - - unsigned int num_banks; - unsigned int num_shader_engines; - unsigned int num_rb_per_se; - bool shaderEnable; - - bool meta_linear; - bool rb_aligned; - bool pipe_aligned; - unsigned int num_pkrs; - } gfx9;/*gfx9, gfx10 and above*/ - struct { - enum swizzle_mode_addr3_values swizzle; - } gfx_addr3;/*gfx with addr3 and above*/ + struct { + enum swizzle_mode_values swizzle; + unsigned int num_pipes; + unsigned int max_compressed_frags; + unsigned int pipe_interleave; + + unsigned int num_banks; + unsigned int num_shader_engines; + unsigned int num_rb_per_se; + bool shaderEnable; + + bool meta_linear; + bool rb_aligned; + bool pipe_aligned; + unsigned int num_pkrs; + } gfx9;/*gfx9, gfx10 and above*/ + struct { + enum swizzle_mode_addr3_values swizzle; + } gfx_addr3;/*gfx with addr3 and above*/ + }; }; /* Rotation angle */ @@ -975,6 +987,9 @@ struct dc_crtc_timing { struct dc_crtc_timing_flags flags; uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */ struct dc_dsc_config dsc_cfg; + + /* The number of pixels that HBlank has been expanded by from the original EDID timing. */ + uint32_t expanded_hblank; }; enum trigger_delay { diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 713884103aea..3e303c7808fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -56,7 +56,7 @@ struct dc_stream_status { int plane_count; int audio_inst; struct timing_sync_info timing_sync_info; - struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; + struct dc_plane_state *plane_states[MAX_SURFACES]; bool is_abm_supported; struct mall_stream_config mall_stream_config; bool fpo_in_use; @@ -539,16 +539,24 @@ bool dc_stream_forward_crc_window(struct dc_stream_state *stream, struct rect *rect, uint8_t phy_id, bool is_stop); + +bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, + struct crc_window *window, + uint8_t phy_id, + bool stop); #endif bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, struct crc_params *crc_window, bool enable, - bool continuous); + bool continuous, + uint8_t idx, + bool reset); bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, + uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index edf4df1d03b5..0c2aa91f0a11 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -76,7 +76,6 @@ struct dc_perf_trace { unsigned long last_entry_write; }; -#define MAX_SURFACE_NUM 6 #define NUM_PIXEL_FORMATS 10 enum tiling_mode { @@ -875,6 +874,14 @@ struct dsc_dec_dpcd_caps { bool is_dp; /* Decoded format */ }; +struct hblank_expansion_dpcd_caps { + bool expansion_supported; + bool reduction_supported; + bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/ + bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/ + uint32_t buffer_size; /* Add 1 to value and multiply by 32 */ +}; + struct dc_golden_table { uint16_t dc_golden_table_ver; uint32_t aux_dphy_rx_control0_val; @@ -932,10 +939,17 @@ enum backlight_control_type { }; #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) +#define MAX_CRC_WINDOW_NUM 2 + struct otg_phy_mux { uint8_t phy_output_num; uint8_t otg_output_num; }; + +struct crc_window { + struct rect rect; + bool enable; +}; #endif enum dc_detect_reason { @@ -1052,10 +1066,13 @@ enum replay_FW_Message_type { union replay_error_status { struct { - unsigned char STATE_TRANSITION_ERROR :1; - unsigned char LINK_CRC_ERROR :1; - unsigned char DESYNC_ERROR :1; - unsigned char RESERVED :5; + unsigned int STATE_TRANSITION_ERROR :1; + unsigned int LINK_CRC_ERROR :1; + unsigned int DESYNC_ERROR :1; + unsigned int RESERVED_3 :1; + unsigned int LOW_RR_INCORRECT_VTOTAL :1; + unsigned int NO_DOUBLED_RR :1; + unsigned int RESERVED_6_7 :2; } bits; unsigned char raw; }; @@ -1102,6 +1119,8 @@ struct replay_config { union replay_error_status replay_error_status; /* Replay Low Hz enable Options */ union replay_low_refresh_rate_enable_options low_rr_enable_options; + /* Replay coasting vtotal is within low refresh rate range. */ + bool low_rr_activated; }; /* Replay feature flags*/ @@ -1126,10 +1145,12 @@ struct replay_settings { uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ uint32_t link_off_frame_count; - /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ - uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; + /* Replay pseudo vtotal for low refresh rate*/ + uint16_t low_rr_full_screen_video_pseudo_vtotal; /* Replay last pseudo vtotal set to DMUB */ uint16_t last_pseudo_vtotal; + /* Replay desync error */ + uint32_t replay_desync_error_fail_count; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index ebd174be5786..1c2009e38aa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -98,7 +98,7 @@ static enum mi_bits_per_pixel get_mi_bpp( } static enum mi_tiling_format get_mi_tiling( - union dc_tiling_info *tiling_info) + struct dc_tiling_info *tiling_info) { switch (tiling_info->gfx8.array_mode) { case DC_ARRAY_1D_TILED_THIN1: @@ -133,7 +133,7 @@ static bool is_vert_scan(enum dc_rotation_angle rotation) static void dce_mi_program_pte_vm( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation) { struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); @@ -430,7 +430,7 @@ static void dce120_mi_program_display_marks(struct mem_input *mi, } static void program_tiling( - struct dce_mem_input *dce_mi, const union dc_tiling_info *info) + struct dce_mem_input *dce_mi, const struct dc_tiling_info *info) { if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */ REG_UPDATE_6(GRPH_CONTROL, @@ -650,7 +650,7 @@ static void dce_mi_clear_tiling( static void dce_mi_program_surface_config( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -670,7 +670,7 @@ static void dce_mi_program_surface_config( static void dce60_mi_program_surface_config( struct mem_input *mi, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, /* not used in DCE6 */ struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index bf636b28e3e1..5bb8b78bf250 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,7 +63,8 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || + link->psr_settings.psr_version == DC_PSR_VERSION_1) return true; if (link->replay_settings.replay_feature_enabled) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c index 8a3fbf95c48f..2c43c2422638 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c @@ -162,7 +162,7 @@ static void enable(struct dce_mem_input *mem_input110) static void program_tiling( struct dce_mem_input *mem_input110, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { uint32_t value = 0; @@ -523,7 +523,7 @@ static const unsigned int dvmm_Hw_Setting_Linear[4][9] = { /* Helper to get table entry from surface info */ static const unsigned int *get_dvmm_hw_setting( - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum surface_pixel_format format, bool chroma) { @@ -563,7 +563,7 @@ static const unsigned int *get_dvmm_hw_setting( static void dce_mem_input_v_program_pte_vm( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation) { struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input); @@ -636,7 +636,7 @@ static void dce_mem_input_v_program_pte_vm( static void dce_mem_input_v_program_surface_config( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index fa422a8cbced..61b0807693fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -2127,70 +2127,131 @@ bool dce110_configure_crc(struct timing_generator *tg, cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL); - /* First, disable CRC before we configure it. */ - dm_write_reg(tg->ctx, cntl_addr, 0); + if (!params->enable || params->reset) + /* First, disable CRC before we configure it. */ + dm_write_reg(tg->ctx, cntl_addr, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); - set_reg_field_value(value, params->windowa_x_start, - CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_START); - set_reg_field_value(value, params->windowa_x_end, - CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window A y axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); - set_reg_field_value(value, params->windowa_y_start, - CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_START); - set_reg_field_value(value, params->windowa_y_end, - CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window B x axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); - set_reg_field_value(value, params->windowb_x_start, - CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_START); - set_reg_field_value(value, params->windowb_x_end, - CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_END); - dm_write_reg(tg->ctx, addr, value); - - /* Window B y axis start and end. */ - value = 0; - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); - set_reg_field_value(value, params->windowb_y_start, - CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_START); - set_reg_field_value(value, params->windowb_y_end, - CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_END); - dm_write_reg(tg->ctx, addr, value); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - value = 0; - set_reg_field_value(value, params->continuous_mode ? 1 : 0, - CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); - set_reg_field_value(value, params->selection, - CRTC_CRC_CNTL, CRTC_CRC0_SELECT); - set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); - dm_write_reg(tg->ctx, cntl_addr, value); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); + set_reg_field_value(value, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START); + set_reg_field_value(value, params->windowa_x_end, + CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window A y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); + set_reg_field_value(value, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START); + set_reg_field_value(value, params->windowa_y_end, + CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); + set_reg_field_value(value, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START); + set_reg_field_value(value, params->windowb_x_end, + CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); + set_reg_field_value(value, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START); + set_reg_field_value(value, params->windowb_y_end, + CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Set crc mode and selection, and enable.*/ + value = 0; + set_reg_field_value(value, params->continuous_mode ? 1 : 0, + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); + set_reg_field_value(value, params->selection, + CRTC_CRC_CNTL, CRTC_CRC0_SELECT); + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); + dm_write_reg(tg->ctx, cntl_addr, value); + break; + case 1: + /* Window A x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL); + set_reg_field_value(value, params->windowa_x_start, + CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_START); + set_reg_field_value(value, params->windowa_x_end, + CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window A y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL); + set_reg_field_value(value, params->windowa_y_start, + CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_START); + set_reg_field_value(value, params->windowa_y_end, + CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B x axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL); + set_reg_field_value(value, params->windowb_x_start, + CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_START); + set_reg_field_value(value, params->windowb_x_end, + CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_END); + dm_write_reg(tg->ctx, addr, value); + + /* Window B y axis start and end. */ + value = 0; + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL); + set_reg_field_value(value, params->windowb_y_start, + CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_START); + set_reg_field_value(value, params->windowb_y_end, + CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_END); + dm_write_reg(tg->ctx, addr, value); + + /* Set crc mode and selection, and enable.*/ + value = 0; + set_reg_field_value(value, params->continuous_mode ? 1 : 0, + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); + set_reg_field_value(value, params->selection, + CRTC_CRC_CNTL, CRTC_CRC1_SELECT); + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); + dm_write_reg(tg->ctx, cntl_addr, value); + break; + default: + return false; + } return true; } -bool dce110_get_crc(struct timing_generator *tg, +bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t addr = 0; @@ -2206,14 +2267,30 @@ bool dce110_get_crc(struct timing_generator *tg, if (!field) return false; - addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); - value = dm_read_reg(tg->ctx, addr); - *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); - *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); + switch (idx) { + case 0: + addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); + value = dm_read_reg(tg->ctx, addr); + *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); - addr = CRTC_REG(mmCRTC_CRC0_DATA_B); - value = dm_read_reg(tg->ctx, addr); - *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); + addr = CRTC_REG(mmCRTC_CRC0_DATA_B); + value = dm_read_reg(tg->ctx, addr); + *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); + break; + case 1: + addr = CRTC_REG(mmCRTC_CRC1_DATA_RG); + value = dm_read_reg(tg->ctx, addr); + *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR); + *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y); + + addr = CRTC_REG(mmCRTC_CRC1_DATA_B); + value = dm_read_reg(tg->ctx, addr); + *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index ee4de740aceb..e4f5cad64f32 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -286,7 +286,7 @@ bool dce110_arm_vert_intr( bool dce110_configure_crc(struct timing_generator *tg, const struct crc_params *params); -bool dce110_get_crc(struct timing_generator *tg, +bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing); diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index fcf59348eb62..31c4f44ceaac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -1100,45 +1100,79 @@ static bool dce120_configure_crc(struct timing_generator *tg, if (!dce120_is_tg_enabled(tg)) return false; - /* First, disable CRC before we configure it. */ - dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, - tg110->offsets.crtc, 0); + if (!params->enable || params->reset) + /* First, disable CRC before we configure it. */ + dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, - CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, - CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); - - /* Window A y axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, - CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, - CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); - - /* Window B x axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, - CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, - CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); - - /* Window B y axis start and end. */ - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, - CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, - CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, - CRTC_CRC_EN, params->continuous_mode ? 1 : 0, - CRTC_CRC0_SELECT, params->selection, - CRTC_CRC_EN, 1); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC0_SELECT, params->selection, + CRTC_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL, + CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL, + CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL, + CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL, + CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable */ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC1_SELECT, params->selection, + CRTC_CRC_EN, 1); + break; + default: + return false; + } return true; } -static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, - uint32_t *g_y, uint32_t *b_cb) +static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); uint32_t value, field; @@ -1151,14 +1185,30 @@ static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, if (!field) return false; - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, - tg110->offsets.crtc); - *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); - *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); + switch (idx) { + case 0: + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, - tg110->offsets.crtc); - *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + break; + case 1: + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y); + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c index b2cea59ba5d4..9a92f73d5b7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c @@ -653,8 +653,9 @@ void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_lin if (!query_dp_alt_from_dmub(enc, &cmd)) return; - if (cmd.query_dp_alt.data.is_usb && - cmd.query_dp_alt.data.is_dp4 == 0) + if (cmd.query_dp_alt.data.is_dp_alt_disable == 0 && + cmd.query_dp_alt.data.is_usb && + cmd.query_dp_alt.data.is_dp4 == 0) link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); return; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c index d4a3e811aa39..ea0c9a9d0bd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c @@ -28,6 +28,7 @@ #include "link_encoder.h" #include "dcn31/dcn31_dio_link_encoder.h" #include "dcn35_dio_link_encoder.h" +#include "dc_dmub_srv.h" #define CTX \ enc10->base.ctx #define DC_LOGGER \ @@ -159,6 +160,8 @@ static const struct link_encoder_funcs dcn35_link_enc_funcs = { .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode, .get_max_link_cap = dcn31_link_encoder_get_max_link_cap, .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux, + .enable_dpia_output = dcn35_link_encoder_enable_dpia_output, + .disable_dpia_output = dcn35_link_encoder_disable_dpia_output, }; void dcn35_link_encoder_construct( @@ -265,3 +268,80 @@ void dcn35_link_encoder_construct( enc10->base.features.flags.bits.HDMI_6GB_EN = 0; } + +/* DPIA equivalent of link_transmitter_control. */ +static bool link_dpia_control(struct dc_context *dc_ctx, + struct dmub_cmd_dig_dpia_control_data *dpia_control) +{ + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.dig1_dpia_control.header.type = DMUB_CMD__DPIA; + cmd.dig1_dpia_control.header.sub_type = + DMUB_CMD__DPIA_DIG1_DPIA_CONTROL; + cmd.dig1_dpia_control.header.payload_bytes = + sizeof(cmd.dig1_dpia_control) - + sizeof(cmd.dig1_dpia_control.header); + + cmd.dig1_dpia_control.dpia_control = *dpia_control; + + dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + + return true; +} + +static void link_encoder_disable(struct dcn10_link_encoder *enc10) +{ + /* reset training complete */ + REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0); +} + +void dcn35_link_encoder_enable_dpia_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 }; + + enc1_configure_encoder(enc10, link_settings); + + dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_ENABLE; + dpia_control.enc_id = enc->preferred_engine; + dpia_control.mode_laneset.digmode = digmode; + dpia_control.lanenum = (uint8_t)link_settings->lane_count; + dpia_control.symclk_10khz = link_settings->link_rate * + LINK_RATE_REF_FREQ_IN_KHZ / 10; + /* DIG_BE_CNTL.DIG_HPD_SELECT set to 5 (hpdsel - 1) to indicate HPD pin unused by DPIA. */ + dpia_control.hpdsel = 6; + dpia_control.dpia_id = dpia_id; + dpia_control.fec_rdy = fec_rdy; + + DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id); + link_dpia_control(enc->ctx, &dpia_control); +} + +void dcn35_link_encoder_disable_dpia_output( + struct link_encoder *enc, + uint8_t dpia_id, + uint8_t digmode) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 }; + + if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc)) + return; + + dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_DISABLE; + dpia_control.enc_id = enc->preferred_engine; + dpia_control.mode_laneset.digmode = digmode; + dpia_control.dpia_id = dpia_id; + + DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id); + link_dpia_control(enc->ctx, &dpia_control); + + link_encoder_disable(enc10); +} diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h index d546a3676304..f9d4221f4b43 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h @@ -144,4 +144,22 @@ bool dcn35_is_dig_enabled(struct link_encoder *enc); enum signal_type dcn35_get_dig_mode(struct link_encoder *enc); void dcn35_link_encoder_setup(struct link_encoder *enc, enum signal_type signal); +/* + * Enable DP transmitter and its encoder for dpia port. + */ +void dcn35_link_encoder_enable_dpia_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy); + +/* + * Disable transmitter and its encoder for dpia port. + */ +void dcn35_link_encoder_disable_dpia_output( + struct link_encoder *enc, + uint8_t dpia_id, + uint8_t digmode); + #endif /* __DC_LINK_ENCODER__DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 2e4a46f1b499..5efddd48d5c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -158,6 +158,11 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable ); + +bool dm_helpers_dp_write_hblank_reduction( + struct dc_context *ctx, + const struct dc_stream_state *stream); + bool dm_helpers_is_dp_sink_present( struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 86ac7d59fd32..0748ef36a16a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1595,6 +1595,7 @@ double dml32_TruncToValidBPP( unsigned int NonDSCBPP0; unsigned int NonDSCBPP1; unsigned int NonDSCBPP2; + unsigned int NonDSCBPP3 = BPP_INVALID; if (Format == dm_420) { NonDSCBPP0 = 12; @@ -1603,6 +1604,7 @@ double dml32_TruncToValidBPP( MinDSCBPP = 6; MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; } else if (Format == dm_444) { + NonDSCBPP3 = 18; NonDSCBPP0 = 24; NonDSCBPP1 = 30; NonDSCBPP2 = 36; @@ -1667,6 +1669,8 @@ double dml32_TruncToValidBPP( return NonDSCBPP1; else if (MaxLinkBPP >= NonDSCBPP0) return 16.0; + else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3) + return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections. else return BPP_INVALID; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index beed7adbbd43..47d785204f29 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 1, + .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index 072bd0539605..6b2ab4ec2b5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -66,11 +66,15 @@ static inline double dml_max5(double a, double b, double c, double d, double e) static inline double dml_ceil(double a, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_ceil2(a, granularity); } static inline double dml_floor(double a, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_floor2(a, granularity); } @@ -114,11 +118,15 @@ static inline double dml_ceil_2(double f) static inline double dml_ceil_ex(double x, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_ceil2(x, granularity); } static inline double dml_floor_ex(double x, double granularity) { + if (granularity == 0) + return 0; return (double) dcn_bw_floor2(x, granularity); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index d9c27ebe12ee..91c4f3b4bd5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -29,7 +29,11 @@ dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) +frame_warn_flag := -Wframe-larger-than=4096 +else frame_warn_flag := -Wframe-larger-than=3072 +endif else frame_warn_flag := -Wframe-larger-than=2048 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 35bc917631ae..84a2de9a76d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1736,7 +1736,7 @@ static void CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -static dml_float_t CalculateTWait( +static noinline_for_stack dml_float_t CalculateTWait( dml_uint_t PrefetchMode, enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, @@ -4458,7 +4458,7 @@ static void CalculateSwathWidth( } } // CalculateSwathWidth -static dml_float_t CalculateExtraLatency( +static noinline_for_stack dml_float_t CalculateExtraLatency( dml_uint_t RoundTripPingLatencyCycles, dml_uint_t ReorderingBytes, dml_float_t DCFCLK, @@ -5915,7 +5915,7 @@ static dml_uint_t DSCDelayRequirement( return DSCDelayRequirement_val; } -static dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, +static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_bool_t NotUrgentLatencyHiding[], dml_float_t ReadBandwidthLuma[], @@ -6019,7 +6019,7 @@ static void CalculatePrefetchBandwithSupport( #endif } -static dml_float_t CalculateBandwidthAvailableForImmediateFlip( +static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_float_t ReadBandwidthLuma[], @@ -6213,7 +6213,7 @@ static dml_uint_t CalculateMaxVStartup( return max_vstartup_lines; } -static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, +static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, dml_uint_t j, dml_uint_t k) @@ -6265,7 +6265,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; } -static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) { struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index efb099905496..b9c6b45f6872 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -10,7 +10,6 @@ #include "dml21_utils.h" #include "dml21_translation_helper.h" #include "bounding_boxes/dcn4_soc_bb.h" -#include "bounding_boxes/dcn3_soc_bb.h" static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, @@ -20,10 +19,6 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ const struct dml2_soc_qos_parameters *qos_params; switch (in_dc->ctx->dce_version) { - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. - soc_bb = &dml2_socbb_dcn31; - qos_params = &dml_dcn31_soc_qos_params; - break; case DCN_VERSION_4_01: default: if (config->bb_from_dmub) @@ -60,9 +55,6 @@ static void dml21_init_ip_params(struct dml2_initialize_instance_in_out *dml_ini const struct dml2_ip_capabilities *ip_caps; switch (in_dc->ctx->dce_version) { - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. - ip_caps = &dml2_dcn31_max_ip_caps; - break; case DCN_VERSION_4_01: default: ip_caps = &dml2_dcn401_max_ip_caps; @@ -302,12 +294,17 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in dml_soc_bb->power_management_parameters.stutter_exit_latency_us = (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; - if (in_dc->ctx->dc_bios->vram_info.num_chans) { + if (dc_bw_params->num_channels) { + dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; + dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; + } else if (in_dc->ctx->dc_bios->vram_info.num_chans) { dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; } - if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { + if (dc_bw_params->dram_channel_width_bytes) { + dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; + } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; } @@ -721,11 +718,21 @@ static void populate_dml21_surface_config_from_plane_state( surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; - if (in_dc->ctx->dce_version < DCN_VERSION_4_01) { - /* needed for N-1 testing */ + + // Update swizzle / array mode based on the gfx_format + switch (plane_state->tiling_info.gfxversion) { + case DcGfxVersion7: + case DcGfxVersion8: + // Placeholder for programming the array_mode + break; + case DcGfxVersion9: + case DcGfxVersion10: + case DcGfxVersion11: surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); - } else { + break; + case DcGfxAddr3: surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); + break; } } @@ -1081,28 +1088,6 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; } -void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) -{ - struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - - if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { - /* invalid register set index */ - return; - } - - /* convert to legacy format (time in ns) */ - watermark->urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->pte_meta_urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.cstate_enter_plus_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_enter / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.cstate_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_exit / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].uclk_pstate / refclk_freq_in_mhz) * 1000.0; - watermark->urgent_latency_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; - watermark->cstate_pstate.fclk_pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].fclk_pstate / refclk_freq_in_mhz) * 1000.0; - watermark->frac_urg_bw_flip = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_flip; - watermark->frac_urg_bw_nom = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_nom; -} - static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) { struct dml2_dchub_watermark_regs *wm_regs = NULL; @@ -1146,53 +1131,6 @@ void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_se } } - -void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming) -{ - unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; - struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; - - hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right + pipe_ctx->hblank_borrow; - vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; - hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; - vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; - - hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right - pipe_ctx->hblank_borrow; - vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; - - if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { - /* phantom has its own global sync */ - global_sync = &stream_programming->phantom_stream.global_sync; - } - - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; - pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; - - pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; - - pipe_ctx->pipe_dlg_param.hactive = hactive; - pipe_ctx->pipe_dlg_param.vactive = vactive; - pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; - pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; - pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; - pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; - pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; - pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; - pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; - pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; - pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; - pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; - pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; - pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; -} - void dml21_map_hw_resources(struct dml2_context *dml_ctx) { unsigned int i = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 476a7f6e4875..069b939c672a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -21,8 +21,6 @@ void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_ void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); -void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); -void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx); void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index cb966f8d3216..1e56d995cd0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -142,108 +142,21 @@ int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, return num_pipes; } - -void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, - struct dml2_display_dlg_regs *disp_dlg_regs, - struct dml2_display_ttu_regs *disp_ttu_regs, - struct pipe_ctx *out) +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) { - memset(&out->rq_regs, 0, sizeof(out->rq_regs)); - out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; - out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; - //out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; - //out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; - out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; - out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; - out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; - out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; - - out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; - out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; - //out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; - //out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; - out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; - out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; - out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; - out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; - - out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; - out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; - //out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; - out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; - out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; - out->unbounded_req = rq_regs->unbounded_request_enabled; - - memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); - out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; - out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; - out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; - out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; - out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; - out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; - out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; - out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; - out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; - out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; - out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; - out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; - out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; - out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; - out->dlg_regs.refcyc_per_tdlut_group = disp_dlg_regs->refcyc_per_tdlut_group; - out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; - out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; - //out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; - //out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; - out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; - out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; - //out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; - //out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; - out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; - out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; - out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; - out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; - //out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; - //out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; - //out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; - //out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; - out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; - out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; - out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; - out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; - out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; - out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; - out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; - out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; - out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; - out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; - //out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; - //out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; - out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; - out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; - out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; - out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; - - memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); - out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; - out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; - out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; - out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; - out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; - out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; - out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; - //out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; - out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; - out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; - out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; - //out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; - out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; - out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; - out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; - //out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; - out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; - out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; - out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; - //out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + /* phantom has its own global sync */ + global_sync = &stream_programming->phantom_stream.global_sync; + } + + memcpy(&pipe_ctx->global_sync, + global_sync, + sizeof(union dml2_global_sync_programming)); } void dml21_populate_mall_allocation_size(struct dc_state *context, @@ -301,28 +214,16 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex { unsigned int pipe_reg_index = 0; - dml21_populate_pipe_ctx_dlg_params(dml_ctx, context, pipe_ctx, stream_prog); + dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); pipe_ctx->unbounded_req = false; - - /* legacy only, should be removed later */ - dml21_update_pipe_ctx_dchub_regs(&pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->rq_regs, - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->dlg_regs, - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); - pipe_ctx->det_buffer_size_kb = 0; } else { memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; - - /* legacy only, should be removed later */ - dml21_update_pipe_ctx_dchub_regs(&pln_prog->pipe_regs[pipe_reg_index]->rq_regs, - &pln_prog->pipe_regs[pipe_reg_index]->dlg_regs, - &pln_prog->pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); - pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h index d5153fbac921..4bff52eaaef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h @@ -18,10 +18,10 @@ struct dml2_display_ttu_regs; int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); -void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, - struct dml2_display_dlg_regs *disp_dlg_regs, - struct dml2_display_ttu_regs *disp_ttu_regs, - struct pipe_ctx *out); +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming); void dml21_populate_mall_allocation_size(struct dc_state *context, struct dml2_context *in_ctx, struct dml2_per_plane_programming *pln_prog, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index bbc28b9a15a3..fb80ba9287b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -75,7 +75,6 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co { switch (in_dc->ctx->dce_version) { case DCN_VERSION_4_01: - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; break; default: @@ -233,13 +232,6 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); dml21_copy_clocks_to_dc_state(dml_ctx, context); dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); - if (in_dc->ctx->dce_version == DCN_VERSION_3_2) { - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.a, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.b, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.c, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.d, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); - } - dml21_build_fams2_programming(in_dc, context, dml_ctx); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h deleted file mode 100644 index d82c681a5402..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML_DML_DCN3_SOC_BB__ -#define __DML_DML_DCN3_SOC_BB__ - -#include "dml_top_soc_parameter_types.h" - -static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 12, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 257, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - .minimum_uclk_khz = 97, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 61, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 118, - }, - { - .minimum_uclk_khz = 435, - .urgent_ramp_uclk_cycles = 546, - .trip_to_memory_uclk_cycles = 848, - .meta_trip_to_memory_uclk_cycles = 848, - .maximum_latency_when_urgent_uclk_cycles = 146, - .average_latency_when_urgent_uclk_cycles = 90, - .maximum_latency_when_non_urgent_uclk_cycles = 848, - .average_latency_when_non_urgent_uclk_cycles = 135, - }, - { - .minimum_uclk_khz = 731, - .urgent_ramp_uclk_cycles = 632, - .trip_to_memory_uclk_cycles = 874, - .meta_trip_to_memory_uclk_cycles = 874, - .maximum_latency_when_urgent_uclk_cycles = 232, - .average_latency_when_urgent_uclk_cycles = 124, - .maximum_latency_when_non_urgent_uclk_cycles = 874, - .average_latency_when_non_urgent_uclk_cycles = 155, - }, - { - .minimum_uclk_khz = 1187, - .urgent_ramp_uclk_cycles = 716, - .trip_to_memory_uclk_cycles = 902, - .meta_trip_to_memory_uclk_cycles = 902, - .maximum_latency_when_urgent_uclk_cycles = 316, - .average_latency_when_urgent_uclk_cycles = 160, - .maximum_latency_when_non_urgent_uclk_cycles = 902, - .average_latency_when_non_urgent_uclk_cycles = 177, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, -}; - -static const struct dml2_soc_bb dml2_socbb_dcn31 = { - .clk_table = { - .uclk = { - .clk_values_khz = {97000, 435000, 731000, 1187000}, - .num_clk_values = 4, - }, - .fclk = { - .clk_values_khz = {300000, 2500000}, - .num_clk_values = 2, - }, - .dcfclk = { - .clk_values_khz = {200000, 1800000}, - .num_clk_values = 2, - }, - .dispclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dppclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dtbclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .phyclk = { - .clk_values_khz = {810000, 810000}, - .num_clk_values = 2, - }, - .socclk = { - .clk_values_khz = {300000, 1600000}, - .num_clk_values = 2, - }, - .dscclk = { - .clk_values_khz = {666667, 666667}, - .num_clk_values = 2, - }, - .phyclk_d18 = { - .clk_values_khz = {625000, 625000}, - .num_clk_values = 2, - }, - .phyclk_d32 = { - .clk_values_khz = {2000000, 2000000}, - .num_clk_values = 2, - }, - .dram_config = { - .channel_width_bytes = 2, - .channel_count = 16, - .transactions_per_clock = 16, - }, - }, - - .qos_parameters = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 0, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 260, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - // State 1 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 72, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 117, - }, - { - // State 2 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 546, - .trip_to_memory_uclk_cycles = 848, - .meta_trip_to_memory_uclk_cycles = 848, - .maximum_latency_when_urgent_uclk_cycles = 146, - .average_latency_when_urgent_uclk_cycles = 146, - .maximum_latency_when_non_urgent_uclk_cycles = 848, - .average_latency_when_non_urgent_uclk_cycles = 133, - }, - { - // State 3 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 564, - .trip_to_memory_uclk_cycles = 853, - .meta_trip_to_memory_uclk_cycles = 853, - .maximum_latency_when_urgent_uclk_cycles = 164, - .average_latency_when_urgent_uclk_cycles = 164, - .maximum_latency_when_non_urgent_uclk_cycles = 853, - .average_latency_when_non_urgent_uclk_cycles = 136, - }, - { - // State 4 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 613, - .trip_to_memory_uclk_cycles = 869, - .meta_trip_to_memory_uclk_cycles = 869, - .maximum_latency_when_urgent_uclk_cycles = 213, - .average_latency_when_urgent_uclk_cycles = 213, - .maximum_latency_when_non_urgent_uclk_cycles = 869, - .average_latency_when_non_urgent_uclk_cycles = 149, - }, - { - // State 5 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 632, - .trip_to_memory_uclk_cycles = 874, - .meta_trip_to_memory_uclk_cycles = 874, - .maximum_latency_when_urgent_uclk_cycles = 232, - .average_latency_when_urgent_uclk_cycles = 232, - .maximum_latency_when_non_urgent_uclk_cycles = 874, - .average_latency_when_non_urgent_uclk_cycles = 153, - }, - { - // State 6 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 665, - .trip_to_memory_uclk_cycles = 885, - .meta_trip_to_memory_uclk_cycles = 885, - .maximum_latency_when_urgent_uclk_cycles = 265, - .average_latency_when_urgent_uclk_cycles = 265, - .maximum_latency_when_non_urgent_uclk_cycles = 885, - .average_latency_when_non_urgent_uclk_cycles = 161, - }, - { - // State 7 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 689, - .trip_to_memory_uclk_cycles = 895, - .meta_trip_to_memory_uclk_cycles = 895, - .maximum_latency_when_urgent_uclk_cycles = 289, - .average_latency_when_urgent_uclk_cycles = 289, - .maximum_latency_when_non_urgent_uclk_cycles = 895, - .average_latency_when_non_urgent_uclk_cycles = 167, - }, - { - // State 8 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 716, - .trip_to_memory_uclk_cycles = 902, - .meta_trip_to_memory_uclk_cycles = 902, - .maximum_latency_when_urgent_uclk_cycles = 316, - .average_latency_when_urgent_uclk_cycles = 316, - .maximum_latency_when_non_urgent_uclk_cycles = 902, - .average_latency_when_non_urgent_uclk_cycles = 174, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, - }, - - .power_management_parameters = { - .dram_clk_change_blackout_us = 400, - .fclk_change_blackout_us = 0, - .g7_ppt_blackout_us = 0, - .stutter_enter_plus_exit_latency_us = 50, - .stutter_exit_latency_us = 43, - .z8_stutter_enter_plus_exit_latency_us = 0, - .z8_stutter_exit_latency_us = 0, - }, - - .vmin_limit = { - .dispclk_khz = 600 * 1000, - }, - - .dprefclk_mhz = 700, - .xtalclk_mhz = 100, - .pcie_refclk_mhz = 100, - .dchub_refclk_mhz = 50, - .mall_allocated_for_dcn_mbytes = 64, - .max_outstanding_reqs = 512, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .hostvm_min_page_size_kbytes = 0, - .gpuvm_min_page_size_kbytes = 256, - .phy_downspread_percent = 0, - .dcn_downspread_percent = 0, - .dispclk_dppclk_vco_speed_mhz = 4500, - .do_urgent_latency_adjustment = 0, - .mem_word_bytes = 32, - .num_dcc_mcaches = 8, - .mcache_size_bytes = 2048, - .mcache_line_size_bytes = 32, - .max_fclk_for_uclk_dpm_khz = 1250 * 1000, -}; - -static const struct dml2_ip_capabilities dml2_dcn31_max_ip_caps = { - .pipe_count = 4, - .otg_count = 4, - .num_dsc = 4, - .max_num_dp2p0_streams = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1152, - .meta_fifo_size_in_kentries = 22, - .compressed_buffer_segment_size_in_kbytes = 64, - .subvp_drr_scheduling_margin_us = 100, - .subvp_prefetch_end_to_mall_start_us = 15, - .subvp_fw_processing_delay = 15, - - .fams2 = { - .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, - .vertical_interrupt_ack_delay_us = 18, - .allow_programming_delay_us = 18, - .min_allow_width_us = 20, - .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, - .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, - }, -}; - -#endif /* __DML_DML_DCN3_SOC_BB__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index b9ec243cf9ba..8ed49a9df378 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -600,8 +600,8 @@ static void CalculateBytePerPixelAndBlockSizes( { *BytePerPixelDETY = 0; *BytePerPixelDETC = 0; - *BytePerPixelY = 0; - *BytePerPixelC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 1; if (SourcePixelFormat == dml2_444_64) { *BytePerPixelDETY = 8; @@ -2778,7 +2778,7 @@ static double dml_get_return_bandwidth_available( return return_bw_mbps; } -static void calculate_bandwidth_available( +static noinline_for_stack void calculate_bandwidth_available( double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM @@ -3528,10 +3528,9 @@ static void CalculateUrgentBurstFactor( dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif - } -static void CalculateDCFCLKDeepSleep( +static void CalculateDCFCLKDeepSleepTdlut( const struct dml2_display_cfg *display_cfg, unsigned int NumberOfActiveSurfaces, unsigned int BytePerPixelY[], @@ -3546,6 +3545,10 @@ static void CalculateDCFCLKDeepSleep( double ReadBandwidthChroma[], unsigned int ReturnBusWidth, + double dispclk, + unsigned int tdlut_bytes_to_deliver[], + double prefetch_swath_time_us[], + // Output double *DCFClkDeepSleep) { @@ -3580,6 +3583,22 @@ static void CalculateDCFCLKDeepSleep( } DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + // adjust for 3dlut delivery time + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { + double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; + + dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + + // increase the deepsleep dcfclk to match the original dispclk throughput rate + if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); + } + } + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); @@ -3602,9 +3621,56 @@ static void CalculateDCFCLKDeepSleep( for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); } + dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); } +static noinline_for_stack void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double zero_double[DML2_MAX_PLANES]; + unsigned int zero_integer[DML2_MAX_PLANES]; + + memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); + memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + NumberOfActiveSurfaces, + BytePerPixelY, + BytePerPixelC, + SwathWidthY, + SwathWidthC, + DPPPerSurface, + PSCL_THROUGHPUT, + PSCL_THROUGHPUT_CHROMA, + Dppclk, + ReadBandwidthLuma, + ReadBandwidthChroma, + ReturnBusWidth, + 0, + zero_integer, //tdlut_bytes_to_deliver, + zero_double, //prefetch_swath_time_us, + + // Output + DCFClkDeepSleep); +} + static double CalculateWriteBackDelay( enum dml2_source_format_class WritebackPixelFormat, double WritebackHRatio, @@ -4076,7 +4142,7 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode, return true; } -static void CalculateODMMode( +static noinline_for_stack void CalculateODMMode( unsigned int MaximumPixelsPerLinePerDSCUnit, unsigned int HActive, enum dml2_output_format_class OutFormat, @@ -4173,7 +4239,7 @@ static void CalculateODMMode( #endif } -static void CalculateOutputLink( +static noinline_for_stack void CalculateOutputLink( struct dml2_core_internal_scratch *s, double PHYCLK, double PHYCLKD18, @@ -4604,6 +4670,7 @@ static void calculate_tdlut_setting( *p->tdlut_groups_per_2row_ub = 0; *p->tdlut_opt_time = 0; *p->tdlut_drain_time = 0; + *p->tdlut_bytes_to_deliver = 0; *p->tdlut_bytes_per_group = 0; *p->tdlut_pte_bytes_per_frame = 0; *p->tdlut_bytes_per_frame = 0; @@ -4672,6 +4739,7 @@ static void calculate_tdlut_setting( *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } #ifdef __DML_VBA_DEBUG__ @@ -4692,6 +4760,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); #endif } @@ -5700,6 +5769,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); @@ -5710,6 +5780,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); @@ -5928,7 +5999,7 @@ static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned in } // a global check against the aggregate effect of the per plane prefetch schedule -static bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, +static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) { struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; @@ -6941,7 +7012,7 @@ static void calculate_bytes_to_fetch_required_to_hide_latency( } } -static void calculate_vactive_det_fill_latency( +static noinline_for_stack void calculate_vactive_det_fill_latency( const struct dml2_display_cfg *display_cfg, unsigned int num_active_planes, unsigned int bytes_required_l[], @@ -8817,6 +8888,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); @@ -9009,6 +9081,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); @@ -9017,6 +9090,27 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); } // for k num_planes + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->ms.dst_y_prefetch[k] < 2.0 || mode_lib->ms.LinesForVM[k] >= 32.0 @@ -10368,12 +10462,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_assert(s->SOCCLK > 0); #ifdef __DML_VBA_DEBUG__ - // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes); - // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes); - // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes); - // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes); - // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes); - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); @@ -10832,8 +10920,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } @@ -11219,6 +11307,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 4f54e54102ef..23c0fca5515f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -958,6 +958,7 @@ struct dml2_core_calcs_mode_support_locals { unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; double tdlut_opt_time[DML2_MAX_PLANES]; double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; @@ -979,6 +980,7 @@ struct dml2_core_calcs_mode_support_locals { enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; unsigned int lb_source_lines_l[DML2_MAX_PLANES]; unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + double prefetch_swath_time_us[DML2_MAX_PLANES]; }; struct dml2_core_calcs_mode_programming_locals { @@ -1042,6 +1044,7 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; double tdlut_opt_time[DML2_MAX_PLANES]; double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; @@ -1809,6 +1812,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { unsigned int *VReadyOffsetPix; double *prefetch_cursor_bw; double *prefetch_sw_bytes; + double *prefetch_swath_time_us; }; struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params { @@ -1993,6 +1997,7 @@ struct dml2_core_calcs_calculate_tdlut_setting_params { unsigned int *tdlut_groups_per_2row_ub; double *tdlut_opt_time; double *tdlut_drain_time; + unsigned int *tdlut_bytes_to_deliver; unsigned int *tdlut_bytes_per_group; }; @@ -2137,7 +2142,6 @@ struct dml2_core_calcs_mode_programming_ex { const struct core_display_cfg_support_info *cfg_support_info; int min_clk_index; struct dml2_display_cfg_programming *programming; - }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 1548dfc68b8e..456b3f8a6d38 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -556,7 +556,7 @@ bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) { bool ret_val = 0; - if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) ret_val = 1; return ret_val; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 8a78b9adfc62..fc77fb34a19a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -212,7 +212,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { @@ -417,11 +417,11 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) { - unsigned char i; + unsigned int i; bool identical = true; bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; // Create a remap array to enable simple iteration through only masked stream indicies for (i = 0; i < display_config->num_streams; i++) { @@ -456,10 +456,10 @@ static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *displa static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) { - unsigned char i; + unsigned int i; int min_idle_us = 0; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; // Create a remap array to enable simple iteration through only masked stream indicies diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index a31db5742675..e763c8e45da8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -195,11 +195,11 @@ static int count_planes_with_stream_index(const struct dml2_display_cfg *display static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) { - unsigned char i; + unsigned int i; bool identical = true; bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; // Create a remap array to enable simple iteration through only masked stream indicies for (i = 0; i < display_config->display_config.num_streams; i++) { @@ -347,8 +347,12 @@ static int find_highest_odm_load_stream_index( int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + if (odm_load > highest_odm_load) { highest_odm_load_index = i; highest_odm_load = odm_load; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 1efbc0329f85..a3324f7b9ba6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -813,8 +813,12 @@ static int find_highest_odm_load_stream_index( int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + if (odm_load > highest_odm_load) { highest_odm_load_index = i; highest_odm_load = odm_load; @@ -986,7 +990,7 @@ static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_config, unsigned int mask) { - unsigned char i; + unsigned int i; bool valid = true; // Create a remap array to enable simple iteration through only masked stream indicies @@ -1035,7 +1039,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_config, unsigned int mask) { - unsigned char i; + unsigned int i; for (i = 0; i < DML2_MAX_PLANES; i++) { const struct dml2_stream_parameters *stream_descriptor; const struct dml2_fams2_meta *stream_fams2_meta; @@ -1077,7 +1081,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, const struct dml2_plane_parameters *plane_descriptor; const struct dml2_fams2_meta *stream_fams2_meta; unsigned int microschedule_vlines; - unsigned char i; + unsigned int i; unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; @@ -1194,7 +1198,7 @@ static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strate static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) { - unsigned char i; + unsigned int i; for (i = 0; i < DML2_MAX_PLANES; i++) { if (is_bit_set_in_bitfield(plane_mask, i)) { @@ -1372,7 +1376,7 @@ static bool is_config_schedulable( if (j_disallow_us < jp1_disallow_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], - s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); swapped = true; } } @@ -1431,7 +1435,7 @@ static bool is_config_schedulable( if (j_period_us < jp1_period_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], - s->pmo_dcn4.sorted_group_gtl_period_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); swapped = true; } } @@ -1545,7 +1549,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins { struct dml2_pmo_scratch *s = &pmo->scratch; - unsigned char stream_index = 0; + unsigned int stream_index = 0; unsigned int svp_count = 0; unsigned int svp_stream_mask = 0; @@ -1609,7 +1613,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) { - unsigned char i; + unsigned int i; int min_vactive_margin_us = 0xFFFFFFF; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1817,7 +1821,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp const struct dml2_pmo_pstate_strategy *strategy_list = NULL; struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; unsigned int strategy_list_size = 0; - unsigned char plane_index, stream_index, i; + unsigned int plane_index, stream_index, i; bool build_override_strategy = true; state->performed = true; @@ -1940,7 +1944,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta * struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -1961,7 +1965,7 @@ static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta * { struct dml2_pmo_scratch *scratch = &pmo->scratch; - unsigned char plane_index; + unsigned int plane_index; int stream_index = -1; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -1984,7 +1988,7 @@ static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_me { struct dml2_pmo_scratch *scratch = &pmo->scratch; - unsigned char plane_index; + unsigned int plane_index; int stream_index = -1; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -2005,7 +2009,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -2025,7 +2029,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; struct dml2_plane_parameters *plane; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -2042,7 +2046,7 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; unsigned int stream_index; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -2063,7 +2067,7 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit struct dml2_pmo_instance *pmo, int plane_mask) { - unsigned char plane_index; + unsigned int plane_index; unsigned int stream_index; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { @@ -2131,7 +2135,7 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) { int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; + unsigned int plane_index = 0; for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c index 5f6dfc24df69..f88931ccbc5e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c @@ -15,7 +15,6 @@ bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) { switch (in_out->options.project_id) { case dml2_project_dcn4x_stage1: - return false; case dml2_project_dcn4x_stage2: case dml2_project_dcn4x_stage2_auto_drr_svp: return dml2_top_soc15_initialize_instance(in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c index db0a30fdb58d..5e14d85821e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c @@ -2,3 +2,9 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "dml2_top_legacy.h" +#include "dml2_top_soc15.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "display_mode_core_structs.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c index b39029c0e56f..a8f58f8448e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -545,6 +545,7 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi if (odm_combine_factor > 1) { max_per_pipe_vp_p0 = plane->surface.plane0.width; temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + if (temp < max_per_pipe_vp_p0) max_per_pipe_vp_p0 = temp; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h index 6fda201af898..53bd8602f9ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT // // Copyright 2024 Advanced Micro Devices, Inc. + #ifndef __DML2_TOP_SOC15_H__ #define __DML2_TOP_SOC15_H__ #include "dml2_internal_shared_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index d94b310d6eec..7fb6026bcb49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -357,8 +357,6 @@ struct dml2_pmo_pstate_strategy { enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; bool allow_state_increase; }; - - struct dml2_core_mode_support_in_out { /* * Inputs diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 3d29169dd6bb..6b3b8803e0ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -813,7 +813,7 @@ static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struc { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; for (i = 0; i < context->stream_count; i++) if (context->streams[i] == stream) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 340791d40ecb..68b882d28195 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -747,11 +747,10 @@ static inline struct dml2_context *dml2_allocate_memory(void) static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { - dml21_reinit(in_dc, dml2, config); + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { + dml21_reinit(in_dc, dml2, config); return; - } + } // Store config options (*dml2)->config = *config; @@ -786,10 +785,8 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) return dml21_create(in_dc, dml2, config); - } // Allocate Mode Lib Ctx *dml2 = dml2_allocate_memory(); @@ -857,8 +854,7 @@ void dml2_reinit(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { dml21_reinit(in_dc, dml2, config); return; } diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index e1da48b05d00..75fb77bca83b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -194,6 +194,9 @@ void dpp_reset(struct dpp *dpp_base) dpp->filter_h = NULL; dpp->filter_v = NULL; + memset(&dpp_base->pos, 0, sizeof(dpp_base->pos)); + memset(&dpp_base->att, 0, sizeof(dpp_base->att)); + memset(&dpp->scl_data, 0, sizeof(dpp->scl_data)); memset(&dpp->pwl_data, 0, sizeof(dpp->pwl_data)); } @@ -480,10 +483,11 @@ void dpp1_set_cursor_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - REG_UPDATE(CURSOR0_CONTROL, - CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp1_cnv_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 3b6ca7974e18..1236e0f9a256 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -154,9 +154,11 @@ void dpp401_set_cursor_position( struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); uint32_t cur_en = pos->enable ? 1 : 0; - REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); + if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } } void dpp401_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index d9aaebfa3a0a..11535922b5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -30,6 +30,9 @@ #include "rc_calc.h" #include "fixed31_32.h" +#define DC_LOGGER \ + dsc->ctx->logger + /* This module's internal functions */ /* default DSC policy target bitrate limit is 16bpp */ @@ -480,6 +483,48 @@ bool dc_dsc_compute_bandwidth_range( return is_dsc_possible; } +void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, + const struct dc_crtc_timing *timing) +{ + struct dsc_enc_caps dsc_enc_caps; + + get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + + DC_LOG_DSC("dsc encoder caps:"); + DC_LOG_DSC("\tdsc_version 0x%x", dsc_enc_caps.dsc_version); + DC_LOG_DSC("\tslice_caps 0x%x", dsc_enc_caps.slice_caps.raw); + DC_LOG_DSC("\tlb_bit_depth %d", dsc_enc_caps.lb_bit_depth); + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_enc_caps.is_block_pred_supported); + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_enc_caps.color_formats.raw); + DC_LOG_DSC("\tcolor_depth 0x%x", dsc_enc_caps.color_depth.raw); + DC_LOG_DSC("\tmax_total_throughput_mps %d", dsc_enc_caps.max_total_throughput_mps); + DC_LOG_DSC("\tmax_slice_width %d", dsc_enc_caps.max_slice_width); + DC_LOG_DSC("\tbpp_increment_div %d", dsc_enc_caps.bpp_increment_div); +} + +void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, + const struct dsc_dec_dpcd_caps *dsc_sink_caps) +{ + DC_LOG_DSC("dsc decoder caps:"); + DC_LOG_DSC("\tis_dsc_supported %d", dsc_sink_caps->is_dsc_supported); + DC_LOG_DSC("\tdsc_version 0x%x", dsc_sink_caps->dsc_version); + DC_LOG_DSC("\trc_buffer_size %d", dsc_sink_caps->rc_buffer_size); + DC_LOG_DSC("\tslice_caps1 0x%x", dsc_sink_caps->slice_caps1.raw); + DC_LOG_DSC("\tslice_caps2 0x%x", dsc_sink_caps->slice_caps2.raw); + DC_LOG_DSC("\tlb_bit_depth %d", dsc_sink_caps->lb_bit_depth); + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_sink_caps->is_block_pred_supported); + DC_LOG_DSC("\tedp_max_bits_per_pixel %d", dsc_sink_caps->edp_max_bits_per_pixel); + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_sink_caps->color_formats.raw); + DC_LOG_DSC("\tthroughput_mode_0_mps %d", dsc_sink_caps->throughput_mode_0_mps); + DC_LOG_DSC("\tthroughput_mode_1_mps %d", dsc_sink_caps->throughput_mode_1_mps); + DC_LOG_DSC("\tmax_slice_width %d", dsc_sink_caps->max_slice_width); + DC_LOG_DSC("\tbpp_increment_div %d", dsc_sink_caps->bpp_increment_div); + DC_LOG_DSC("\tbranch_overall_throughput_0_mps %d", dsc_sink_caps->branch_overall_throughput_0_mps); + DC_LOG_DSC("\tbranch_overall_throughput_1_mps %d", dsc_sink_caps->branch_overall_throughput_1_mps); + DC_LOG_DSC("\tbranch_max_line_width %d", dsc_sink_caps->branch_max_line_width); + DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp); +} + static void get_dsc_enc_caps( const struct display_stream_compressor *dsc, struct dsc_enc_caps *dsc_enc_caps, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index f0ba944553df..9b026600b90e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -140,7 +140,7 @@ void hubp1_vready_workaround(struct hubp *hubp, void hubp1_program_tiling( struct hubp *hubp, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); @@ -546,10 +546,16 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, SECONDARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk); } +void hubp_reset(struct hubp *hubp) +{ + memset(&hubp->pos, 0, sizeof(hubp->pos)); + memset(&hubp->att, 0, sizeof(hubp->att)); +} + void hubp1_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -1351,8 +1357,9 @@ static void hubp1_wait_pipe_read_start(struct hubp *hubp) void hubp1_init(struct hubp *hubp) { - //do nothing + hubp_reset(hubp); } + static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp1_program_surface_flip_and_addr, @@ -1365,6 +1372,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_set_vm_context0_settings = hubp1_set_vm_context0_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_hubp_blank_en = hubp1_set_hubp_blank_en, .set_cursor_attributes = hubp1_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index 631350cd4f2e..c7765e6f09e6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -706,7 +706,7 @@ struct dcn10_hubp { void hubp1_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -739,13 +739,15 @@ void hubp1_program_rotation( void hubp1_program_tiling( struct hubp *hubp, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp1_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks); +void hubp_reset(struct hubp *hubp); + bool hubp1_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 200194544bf0..91259b896e03 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -310,7 +310,7 @@ void hubp2_setup_interdependent( */ static void hubp2_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { REG_UPDATE_3(DCSURF_ADDR_CONFIG, @@ -550,7 +550,7 @@ void hubp2_program_pixel_format( void hubp2_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -1058,11 +1058,13 @@ void hubp2_cursor_set_position( if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, + REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, pos->x, @@ -1674,6 +1676,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp2_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index 7fd9240868c3..6968087a3605 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -382,7 +382,7 @@ void hubp2_program_pixel_format( void hubp2_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c index d910e4a54c34..ec88ee424a7f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c @@ -42,7 +42,7 @@ static void hubp201_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -121,6 +121,7 @@ static struct hubp_funcs dcn201_hubp_funcs = { .set_cursor_position = hubp1_cursor_set_position, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c index edbdb8c88d5c..e2740482e1cf 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c @@ -811,6 +811,8 @@ static void hubp21_init(struct hubp *hubp) struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + hubp_reset(hubp); } static struct hubp_funcs dcn21_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, @@ -823,6 +825,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp21_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp1_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 3b16c3cda2c3..be0ac613675a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -318,7 +318,7 @@ bool hubp3_program_surface_flip_and_addr( void hubp3_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { REG_UPDATE_4(DCSURF_ADDR_CONFIG, @@ -411,7 +411,7 @@ void hubp3_dmdata_set_attributes( void hubp3_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -499,6 +499,8 @@ void hubp3_init(struct hubp *hubp) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + + hubp_reset(hubp); } static struct hubp_funcs dcn30_hubp_funcs = { @@ -513,6 +515,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index cfb01bf340a1..b7d7adf0b58c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -264,7 +264,7 @@ bool hubp3_program_surface_flip_and_addr( void hubp3_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -280,7 +280,7 @@ void hubp3_setup( void hubp3_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp3_dcc_control(struct hubp *hubp, bool enable, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 46b804ed05fb..c2900c79a2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -79,6 +79,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index 8b5bd73b8094..edd37898d550 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -181,6 +181,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index eb62042dfafc..5661d7a80d54 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -172,7 +172,7 @@ void hubp35_program_pixel_format( void hubp35_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -199,6 +199,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, + .hubp_reset = hubp_reset, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h index 586b43aa5834..d913f80b3130 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h @@ -65,7 +65,7 @@ void hubp35_program_pixel_format( void hubp35_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 09f730cfbf8e..5ed195377a6c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -141,34 +141,48 @@ void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor void hubp401_init(struct hubp *hubp) { - //For now nothing to do, HUBPREQ_DEBUG_DB register is removed on DCN4x. + hubp_reset(hubp); } void hubp401_vready_at_or_After_vsync(struct hubp *hubp, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing) { - uint32_t value = 0; + unsigned int vstartup_lines = pipe_global_sync->dcn4x.vstartup_lines; + unsigned int vupdate_offset_pixels = pipe_global_sync->dcn4x.vupdate_offset_pixels; + unsigned int vupdate_width_pixels = pipe_global_sync->dcn4x.vupdate_vupdate_width_pixels; + unsigned int vready_offset_pixels = pipe_global_sync->dcn4x.vready_offset_pixels; + unsigned int htotal = timing->h_total; + unsigned int vblank_start = 0; + unsigned int vblank_end = 0; + unsigned int pixel_width = 0; + uint32_t reg_value = 0; + bool is_vready_at_or_after_vsync = false; struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + /* * if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1 * else * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0 */ - if (pipe_dest->htotal != 0) { - if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width - + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { - value = 1; - } else - value = 0; + if (htotal != 0) { + vblank_start = timing->v_total - timing->v_front_porch; + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; + pixel_width = vready_offset_pixels + vupdate_width_pixels + vupdate_offset_pixels; + + is_vready_at_or_after_vsync = (vstartup_lines - pixel_width / htotal) <= vblank_end; + + if (is_vready_at_or_after_vsync) + reg_value = 1; } - REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); + REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, reg_value); } void hubp401_program_requestor( struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) + struct dml2_display_rq_regs *rq_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -196,8 +210,8 @@ void hubp401_program_requestor( void hubp401_program_deadline( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) + struct dml2_display_dlg_regs *dlg_attr, + struct dml2_display_ttu_regs *ttu_attr) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -294,66 +308,64 @@ void hubp401_program_deadline( void hubp401_setup( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, - struct _vcs_dpi_display_rq_regs_st *rq_regs, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing) { /* otg is locked when this func is called. Register are double buffered. * disable the requestors is not needed */ - hubp401_vready_at_or_After_vsync(hubp, pipe_dest); - hubp401_program_requestor(hubp, rq_regs); - hubp401_program_deadline(hubp, dlg_attr, ttu_attr); + hubp401_vready_at_or_After_vsync(hubp, pipe_global_sync, timing); + hubp401_program_requestor(hubp, &pipe_regs->rq_regs); + hubp401_program_deadline(hubp, &pipe_regs->dlg_regs, &pipe_regs->ttu_regs); } void hubp401_setup_interdependent( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) + struct dml2_dchub_per_pipe_register_set *pipe_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_SET_2(PREFETCH_SETTINGS, 0, - DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, - VRATIO_PREFETCH, dlg_attr->vratio_prefetch); + DST_Y_PREFETCH, pipe_regs->dlg_regs.dst_y_prefetch, + VRATIO_PREFETCH, pipe_regs->dlg_regs.vratio_prefetch); REG_SET(PREFETCH_SETTINGS_C, 0, - VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); + VRATIO_PREFETCH_C, pipe_regs->dlg_regs.vratio_prefetch_c); REG_SET_2(VBLANK_PARAMETERS_0, 0, - DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, - DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); + DST_Y_PER_VM_VBLANK, pipe_regs->dlg_regs.dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, pipe_regs->dlg_regs.dst_y_per_row_vblank); REG_SET_2(FLIP_PARAMETERS_0, 0, - DST_Y_PER_VM_FLIP, dlg_attr->dst_y_per_vm_flip, - DST_Y_PER_ROW_FLIP, dlg_attr->dst_y_per_row_flip); + DST_Y_PER_VM_FLIP, pipe_regs->dlg_regs.dst_y_per_vm_flip, + DST_Y_PER_ROW_FLIP, pipe_regs->dlg_regs.dst_y_per_row_flip); REG_SET(VBLANK_PARAMETERS_3, 0, - REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); + REFCYC_PER_META_CHUNK_VBLANK_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_l); REG_SET(VBLANK_PARAMETERS_4, 0, - REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); + REFCYC_PER_META_CHUNK_VBLANK_C, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_c); REG_SET(FLIP_PARAMETERS_2, 0, - REFCYC_PER_META_CHUNK_FLIP_L, dlg_attr->refcyc_per_meta_chunk_flip_l); + REFCYC_PER_META_CHUNK_FLIP_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_flip_l); REG_SET_2(PER_LINE_DELIVERY_PRE, 0, - REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, - REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); + REFCYC_PER_LINE_DELIVERY_PRE_L, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_c); REG_SET(DCN_SURF0_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_l); + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_l); REG_SET(DCN_SURF1_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_c); + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_c); REG_SET(DCN_CUR0_TTU_CNTL1, 0, - REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); + REFCYC_PER_REQ_DELIVERY_PRE, pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_cur0); REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, - MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, - QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); + MIN_TTU_VBLANK, pipe_regs->ttu_regs.min_ttu_vblank, + QoS_LEVEL_FLIP, pipe_regs->ttu_regs.qos_level_flip); } @@ -532,7 +544,7 @@ void hubp401_dcc_control(struct hubp *hubp, void hubp401_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format) { /* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x @@ -580,7 +592,7 @@ void hubp401_program_size( void hubp401_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -730,11 +742,13 @@ void hubp401_cursor_set_position( dc_fixpt_from_int(dst_x_offset), param->h_scale_ratio)); - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, - CURSOR_ENABLE, cur_en); + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); + } REG_SET_2(CURSOR_POSITION, 0, CURSOR_X_POSITION, x_pos, @@ -981,11 +995,12 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr, .hubp_program_surface_config = hubp401_program_surface_config, .hubp_is_flip_pending = hubp2_is_flip_pending, - .hubp_setup = hubp401_setup, - .hubp_setup_interdependent = hubp401_setup_interdependent, + .hubp_setup2 = hubp401_setup, + .hubp_setup_interdependent2 = hubp401_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, .set_blank_regs = hubp2_set_blank_regs, + .hubp_reset = hubp_reset, .mem_program_viewport = hubp401_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp401_cursor_set_position, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index 9b200a55bf9d..6e1d4c90ddd4 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -256,29 +256,15 @@ void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); -void hubp401_vready_at_or_After_vsync(struct hubp *hubp, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); - -void hubp401_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs); - -void hubp401_program_deadline( - struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); - void hubp401_setup( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, - struct _vcs_dpi_display_rq_regs_st *rq_regs, - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); void hubp401_setup_interdependent( struct hubp *hubp, - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); + struct dml2_dchub_per_pipe_register_set *pipe_regs); bool hubp401_program_surface_flip_and_addr( struct hubp *hubp, @@ -290,7 +276,7 @@ void hubp401_dcc_control(struct hubp *hubp, void hubp401_program_tiling( struct dcn20_hubp *hubp2, - const union dc_tiling_info *info, + const struct dc_tiling_info *info, const enum surface_pixel_format pixel_format); void hubp401_program_size( @@ -302,7 +288,7 @@ void hubp401_program_size( void hubp401_program_surface_config( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, @@ -365,4 +351,17 @@ void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mo void hubp401_clear_tiling(struct hubp *hubp); +void hubp401_vready_at_or_After_vsync(struct hubp *hubp, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); + +void hubp401_program_requestor( + struct hubp *hubp, + struct dml2_display_rq_regs *rq_regs); + +void hubp401_program_deadline( + struct hubp *hubp, + struct dml2_display_dlg_regs *dlg_attr, + struct dml2_display_ttu_regs *ttu_attr); + #endif /* __DC_HUBP_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 681bb92c6069..44e405e9bc97 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1286,6 +1286,7 @@ void dcn10_plane_atomic_power_down(struct dc *dc, if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, hubp->inst, false); + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, @@ -1447,6 +1448,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index b029ec1b26d3..a5e18ab72394 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1288,7 +1288,7 @@ static void dcn20_power_on_plane_resources( } } -static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, +void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) { //if (dc->debug.sanity_checks) { @@ -1467,7 +1467,7 @@ void dcn20_pipe_control_lock( } } -static void dcn20_detect_pipe_changes(struct dc_state *old_state, +void dcn20_detect_pipe_changes(struct dc_state *old_state, struct dc_state *new_state, struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) @@ -1655,7 +1655,7 @@ static void dcn20_detect_pipe_changes(struct dc_state *old_state, } } -static void dcn20_update_dchubp_dpp( +void dcn20_update_dchubp_dpp( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) @@ -1678,25 +1678,41 @@ static void dcn20_update_dchubp_dpp( * VTG is within DCHUBBUB which is commond block share by each pipe HUBP. * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG */ + if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst); - hubp->funcs->hubp_setup( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs, - &pipe_ctx->rq_regs, - &pipe_ctx->pipe_dlg_param); + if (hubp->funcs->hubp_setup2) { + hubp->funcs->hubp_setup2( + hubp, + &pipe_ctx->hubp_regs, + &pipe_ctx->global_sync, + &pipe_ctx->stream->timing); + } else { + hubp->funcs->hubp_setup( + hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs, + &pipe_ctx->rq_regs, + &pipe_ctx->pipe_dlg_param); + } } if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req); - if (pipe_ctx->update_flags.bits.hubp_interdependent) - hubp->funcs->hubp_setup_interdependent( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs); + if (pipe_ctx->update_flags.bits.hubp_interdependent) { + if (hubp->funcs->hubp_setup_interdependent2) { + hubp->funcs->hubp_setup_interdependent2( + hubp, + &pipe_ctx->hubp_regs); + } else { + hubp->funcs->hubp_setup_interdependent( + hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs); + } + } if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || @@ -1756,10 +1772,9 @@ static void dcn20_update_dchubp_dpp( &pipe_ctx->plane_res.scl_data.viewport_c); viewport_changed = true; } - if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) - hubp->funcs->hubp_program_mcache_id_and_split_coordinate( - hubp, - &pipe_ctx->mcache_regs); + + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) + hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, &pipe_ctx->mcache_regs); /* Any updates are handled in dc interface, just need to apply existing for plane enable */ if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || @@ -1838,7 +1853,7 @@ static void dcn20_update_dchubp_dpp( hubp->funcs->phantom_hubp_post_enable(hubp); } -static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +static int dcn20_calculate_vready_offset_for_group(struct pipe_ctx *pipe) { struct pipe_ctx *other_pipe; int vready_offset = pipe->pipe_dlg_param.vready_offset; @@ -1864,6 +1879,30 @@ static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) return vready_offset; } +static void dcn20_program_tg( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dce_hwseq *hws) +{ + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn20_calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); +} + static void dcn20_program_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -1874,33 +1913,17 @@ static void dcn20_program_pipe( /* Only need to unblank on top pipe */ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.odm || - pipe_ctx->stream->update_flags.bits.abm_level) + pipe_ctx->update_flags.bits.odm || + pipe_ctx->stream->update_flags.bits.abm_level) hws->funcs.blank_pixel_data(dc, pipe_ctx, - !pipe_ctx->plane_state || - !pipe_ctx->plane_state->visible); + !pipe_ctx->plane_state || + !pipe_ctx->plane_state->visible); } /* Only update TG on top pipe */ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe - && !pipe_ctx->prev_odm_pipe) { - pipe_ctx->stream_res.tg->funcs->program_global_sync( - pipe_ctx->stream_res.tg, - calculate_vready_offset_for_group(pipe_ctx), - pipe_ctx->pipe_dlg_param.vstartup_start, - pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width, - pipe_ctx->pipe_dlg_param.pstate_keepout); - - if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); - - pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); - - if (hws->funcs.setup_vupdate_interrupt) - hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); - } + && !pipe_ctx->prev_odm_pipe) + dcn20_program_tg(dc, pipe_ctx, context, hws); if (pipe_ctx->update_flags.bits.odm) hws->funcs.update_odm(dc, context, pipe_ctx); @@ -1931,22 +1954,22 @@ static void dcn20_program_pipe( dcn20_update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a); pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } if (pipe_ctx->plane_state && - (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d || - pipe_ctx->update_flags.bits.enable)) + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1954,10 +1977,10 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1966,7 +1989,7 @@ static void dcn20_program_pipe( * causes a different pipe to be chosen to odm combine with. */ if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->update_flags.bits.opp_changed) { + || pipe_ctx->update_flags.bits.opp_changed) { pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( pipe_ctx->stream_res.opp, @@ -1996,14 +2019,14 @@ static void dcn20_program_pipe( memset(¶ms, 0, sizeof(params)); odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, ¶ms); dc->hwss.set_disp_pattern_generator(dc, - pipe_ctx, - pipe_ctx->stream_res.test_pattern_params.test_pattern, - pipe_ctx->stream_res.test_pattern_params.color_space, - pipe_ctx->stream_res.test_pattern_params.color_depth, - NULL, - pipe_ctx->stream_res.test_pattern_params.width, - pipe_ctx->stream_res.test_pattern_params.height, - pipe_ctx->stream_res.test_pattern_params.offset); + pipe_ctx, + pipe_ctx->stream_res.test_pattern_params.test_pattern, + pipe_ctx->stream_res.test_pattern_params.color_space, + pipe_ctx->stream_res.test_pattern_params.color_depth, + NULL, + pipe_ctx->stream_res.test_pattern_params.width, + pipe_ctx->stream_res.test_pattern_params.height, + pipe_ctx->stream_res.test_pattern_params.offset); } } @@ -2012,11 +2035,12 @@ void dcn20_program_front_end_for_ctx( struct dc_state *context) { int i; - struct dce_hwseq *hws = dc->hwseq; - DC_LOGGER_INIT(dc->ctx->logger); unsigned int prev_hubp_count = 0; unsigned int hubp_count = 0; - struct pipe_ctx *pipe; + struct dce_hwseq *hws = dc->hwseq; + struct pipe_ctx *pipe = NULL; + + DC_LOGGER_INIT(dc->ctx->logger); if (resource_is_pipe_topology_changed(dc->current_state, context)) resource_log_pipe_topology_update(dc, context); @@ -2029,7 +2053,7 @@ void dcn20_program_front_end_for_ctx( ASSERT(!pipe->plane_state->triplebuffer_flips); /*turn off triple buffer for full update*/ dc->hwss.program_triplebuffer( - dc, pipe, pipe->plane_state->triplebuffer_flips); + dc, pipe, pipe->plane_state->triplebuffer_flips); } } } @@ -2044,30 +2068,31 @@ void dcn20_program_front_end_for_ctx( if (prev_hubp_count == 0 && hubp_count > 0) { if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, true, false); + dc->res_pool->hubbub, true, false); udelay(500); } /* Set pipe update flags and lock pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], - &context->res_ctx.pipe_ctx[i]); + &context->res_ctx.pipe_ctx[i]); /* When disabling phantom pipes, turn on phantom OTG first (so we can get double * buffer updates properly) */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && - dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; if (tg->funcs->enable_crtc) { - if (dc->hwseq->funcs.blank_pixel_data) { + if (dc->hwseq->funcs.blank_pixel_data) dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); - } + tg->funcs->enable_crtc(tg); } } @@ -2075,15 +2100,15 @@ void dcn20_program_front_end_for_ctx( /* OTG blank before disabling all front ends */ for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - && !context->res_ctx.pipe_ctx[i].top_pipe - && !context->res_ctx.pipe_ctx[i].prev_odm_pipe - && context->res_ctx.pipe_ctx[i].stream) + && !context->res_ctx.pipe_ctx[i].top_pipe + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe + && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { struct hubbub *hubbub = dc->res_pool->hubbub; /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom @@ -2093,13 +2118,18 @@ void dcn20_program_front_end_for_ctx( * DET allocation. */ if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || - (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) { + (context->res_ctx.pipe_ctx[i].plane_state && + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) + == SUBVP_PHANTOM))) { if (hubbub->funcs->program_det_size) - hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + hubbub->funcs->program_det_size(hubbub, + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); if (dc->res_pool->hubbub->funcs->program_det_segments) - dc->res_pool->hubbub->funcs->program_det_segments(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + dc->res_pool->hubbub->funcs->program_det_segments( + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); } - hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, + &dc->current_state->res_ctx.pipe_ctx[i]); DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); } @@ -2107,9 +2137,9 @@ void dcn20_program_front_end_for_ctx( for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; if (resource_is_pipe_type(pipe, OTG_MASTER) && - !resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->update_flags.bits.odm && - hws->funcs.update_odm) + !resource_is_pipe_type(pipe, DPP_PIPE) && + pipe->update_flags.bits.odm && + hws->funcs.update_odm) hws->funcs.update_odm(dc, context, pipe); } @@ -2127,25 +2157,28 @@ void dcn20_program_front_end_for_ctx( else { /* Don't program phantom pipes in the regular front end programming sequence. * There is an MPO transition case where a pipe being used by a video plane is - * transitioned directly to be a phantom pipe when closing the MPO video. However - * the phantom pipe will program a new HUBP_VTG_SEL (update takes place right away), - * but the MPO still exists until the double buffered update of the main pipe so we - * will get a frame of underflow if the phantom pipe is programmed here. + * transitioned directly to be a phantom pipe when closing the MPO video. + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place + * right away) but the MPO still exists until the double buffered update of the + * main pipe so we will get a frame of underflow if the phantom pipe is + * programmed here. */ - if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) + if (pipe->stream && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) dcn20_program_pipe(dc, pipe, context); } pipe = pipe->bottom_pipe; } } + /* Program secondary blending tree and writeback pipes */ pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->top_pipe && !pipe->prev_odm_pipe - && pipe->stream && pipe->stream->num_wb_info > 0 - && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) - || pipe->stream->update_flags.raw) - && hws->funcs.program_all_writeback_pipes_in_tree) + && pipe->stream && pipe->stream->num_wb_info > 0 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) + || pipe->stream->update_flags.raw) + && hws->funcs.program_all_writeback_pipes_in_tree) hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); /* Avoid underflow by check of pipe line read when adding 2nd plane. */ @@ -2164,7 +2197,7 @@ void dcn20_program_front_end_for_ctx( * buffered pending status clear and reset opp head pipe's none double buffered * registers to their initial state. */ -static void post_unlock_reset_opp(struct dc *dc, +void dcn20_post_unlock_reset_opp(struct dc *dc, struct pipe_ctx *opp_head) { struct display_stream_compressor *dsc = opp_head->stream_res.dsc; @@ -2201,16 +2234,17 @@ void dcn20_post_unlock_program_front_end( struct dc *dc, struct dc_state *context) { - int i; - const unsigned int TIMEOUT_FOR_PIPE_ENABLE_US = 100000; + // Timeout for pipe enable + unsigned int timeout_us = 100000; unsigned int polling_interval_us = 1; struct dce_hwseq *hwseq = dc->hwseq; + int i; for (i = 0; i < dc->res_pool->pipe_count; i++) if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && - !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) - post_unlock_reset_opp(dc, - &dc->current_state->res_ctx.pipe_ctx[i]); + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) + dcn20_post_unlock_reset_opp(dc, + &dc->current_state->res_ctx.pipe_ctx[i]); for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) @@ -2226,11 +2260,12 @@ void dcn20_post_unlock_program_front_end( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us - && hubp->funcs->hubp_is_flip_pending(hubp); j++) + + for (j = 0; j < timeout_us / polling_interval_us + && hubp->funcs->hubp_is_flip_pending(hubp); j++) udelay(polling_interval_us); } } @@ -2244,15 +2279,14 @@ void dcn20_post_unlock_program_front_end( * before we've transitioned to 2:1 or 4:1 */ if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && - resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { int j = 0; struct timing_generator *tg = pipe->stream_res.tg; - if (tg->funcs->get_optc_double_buffer_pending) { - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us - && tg->funcs->get_optc_double_buffer_pending(tg); j++) + for (j = 0; j < timeout_us / polling_interval_us + && tg->funcs->get_optc_double_buffer_pending(tg); j++) udelay(polling_interval_us); } } @@ -2260,7 +2294,7 @@ void dcn20_post_unlock_program_front_end( if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, false, false); + dc->res_pool->hubbub, false, false); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -2291,11 +2325,11 @@ void dcn20_post_unlock_program_front_end( return; /* P-State support transitions: - * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe - * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) - * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe - * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe - * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); @@ -2310,12 +2344,11 @@ void dcn20_post_unlock_program_front_end( if (hwseq->wa.DEGVIDCN21) dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); - /* WA for stutter underflow during MPO transitions when adding 2nd plane */ if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { if (dc->current_state->stream_status[0].plane_count == 1 && - context->stream_status[0].plane_count > 1) { + context->stream_status[0].plane_count > 1) { struct timing_generator *tg = dc->res_pool->timing_generators[0]; @@ -2463,7 +2496,7 @@ bool dcn20_update_bandwidth( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - calculate_vready_offset_for_group(pipe_ctx), + dcn20_calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index 5c874f7b0683..9d1ad3b29ca5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -154,6 +154,21 @@ void dcn20_setup_gsl_group_as_lock( const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enable); - +void dcn20_detect_pipe_changes( + struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); +void dcn20_enable_plane( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn20_update_dchubp_dpp( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn20_post_unlock_reset_opp( + struct dc *dc, + struct pipe_ctx *opp_head); #endif /* __DC_HWSS_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 2e6ecf939fbd..ee4de9ddfef4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1399,12 +1399,12 @@ void dcn32_disable_link_output(struct dc_link *link, link_hwss->disable_link_output(link, link_res, signal); link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; - - if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_power_control && - !link->skip_implict_edp_power_control) - link->dc->hwss.edp_power_control(link, false); - else if (dmcu != NULL && dmcu->funcs->unlock_phy) + /* + * Add the logic to extract BOTH power up and power down sequences + * from enable/disable link output and only call edp panel control + * in enable_link_dp and disable_link_dp once. + */ + if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index d5f76cc69c73..623cde76debf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -800,6 +800,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -956,6 +957,7 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) /*to do, need to support both case*/ hubp->power_gated = true; + hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); pipe_ctx->stream = NULL; @@ -1032,8 +1034,13 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false; - if (pipe_ctx->stream_res.dsc) + if (pipe_ctx->stream_res.dsc) { update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false; + if (dc->caps.sequential_ono) { + update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; + update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + } + } if (pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; @@ -1591,3 +1598,37 @@ bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) return false; } + +/* + * Set powerup to true for every pipe to match pre-OS configuration. + */ +static void dcn35_calc_blocks_to_ungate_for_hw_release(struct dc *dc, struct pg_block_update *update_state) +{ + int i = 0, j = 0; + + memset(update_state, 0, sizeof(struct pg_block_update)); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++) + update_state->pg_pipe_res_update[j][i] = true; + + update_state->pg_res_update[PG_HPO] = true; + update_state->pg_res_update[PG_DWB] = true; +} + +/* + * The purpose is to power up all gatings to restore optimization to pre-OS env. + * Re-use hwss func and existing PG&RCG flags to decide powerup sequence. + */ +void dcn35_hardware_release(struct dc *dc) +{ + struct pg_block_update pg_update_state; + + dcn35_calc_blocks_to_ungate_for_hw_release(dc, &pg_update_state); + + if (dc->hwss.root_clock_control) + dc->hwss.root_clock_control(dc, &pg_update_state, true); + /*power up required HW block*/ + if (dc->hwss.hw_block_power_up) + dc->hwss.hw_block_power_up(dc, &pg_update_state); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index e27b3609020f..0b1d6f608edd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -99,4 +99,6 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); +void dcn35_hardware_release(struct dc *dc); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 5ca8db2b2d03..c7acaf97974c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -122,6 +122,11 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .hardware_release = dcn35_hardware_release, + .detect_pipe_changes = dcn20_detect_pipe_changes, + .enable_plane = dcn20_enable_plane, + .update_dchubp_dpp = dcn20_update_dchubp_dpp, + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, }; static const struct hwseq_private_funcs dcn35_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 8cb0fbd301d8..555a9f590cd7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dm_services.h" +#include "basics/dc_common.h" #include "dm_helpers.h" #include "core_types.h" #include "resource.h" @@ -126,91 +127,6 @@ void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx) mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); } -struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region) -{ - struct dce_hwseq *hws = dc->hwseq; - struct ips_ono_region_state state = {0, 0}; - - switch (region) { - case 0: - /* dccg, dio, dcio */ - REG_GET_2(DOMAIN22_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 1: - /* dchubbub, dchvm, dchubbubmem */ - REG_GET_2(DOMAIN23_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 2: - /* mpc, opp, optc, dwb */ - REG_GET_2(DOMAIN24_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 3: - /* hpo */ - REG_GET_2(DOMAIN25_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 4: - /* dchubp0, dpp0 */ - REG_GET_2(DOMAIN0_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 5: - /* dsc0 */ - REG_GET_2(DOMAIN16_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 6: - /* dchubp1, dpp1 */ - REG_GET_2(DOMAIN1_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 7: - /* dsc1 */ - REG_GET_2(DOMAIN17_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 8: - /* dchubp2, dpp2 */ - REG_GET_2(DOMAIN2_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 9: - /* dsc2 */ - REG_GET_2(DOMAIN18_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 10: - /* dchubp3, dpp3 */ - REG_GET_2(DOMAIN3_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - case 11: - /* dsc3 */ - REG_GET_2(DOMAIN19_PG_STATUS, - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); - break; - default: - break; - } - - return state; -} - void dcn401_init_hw(struct dc *dc) { struct abm **abms = dc->res_pool->multiple_abms; @@ -881,15 +797,15 @@ enum dc_status dcn401_enable_stream_timing( patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; pipe_ctx->stream_res.tg->funcs->program_timing( - pipe_ctx->stream_res.tg, - &patched_crtc_timing, - pipe_ctx->pipe_dlg_param.vready_offset, - pipe_ctx->pipe_dlg_param.vstartup_start, - pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width, - pipe_ctx->pipe_dlg_param.pstate_keepout, - pipe_ctx->stream->signal, - true); + pipe_ctx->stream_res.tg, + &patched_crtc_timing, + (unsigned int)pipe_ctx->global_sync.dcn4x.vready_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines, + pipe_ctx->stream->signal, + true); for (i = 0; i < opp_cnt; i++) { opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control( @@ -2013,3 +1929,730 @@ void dcn401_reset_hw_ctx_wrap( } } } + +static unsigned int dcn401_calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + unsigned int vready_offset = pipe->global_sync.dcn4x.vready_offset_pixels; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; + } + + return vready_offset; +} + +static void dcn401_program_tg( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dce_hwseq *hws) +{ + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn401_calculate_vready_offset_for_group(pipe_ctx), + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); +} + +static void dcn401_program_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + struct dce_hwseq *hws = dc->hwseq; + + /* Only need to unblank on top pipe */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.odm || + pipe_ctx->stream->update_flags.bits.abm_level) + hws->funcs.blank_pixel_data(dc, pipe_ctx, + !pipe_ctx->plane_state || + !pipe_ctx->plane_state->visible); + } + + /* Only update TG on top pipe */ + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe + && !pipe_ctx->prev_odm_pipe) + dcn401_program_tg(dc, pipe_ctx, context, hws); + + if (pipe_ctx->update_flags.bits.odm) + hws->funcs.update_odm(dc, context, pipe_ctx); + + if (pipe_ctx->update_flags.bits.enable) { + if (hws->funcs.enable_plane) + hws->funcs.enable_plane(dc, pipe_ctx, context); + else + dc->hwss.enable_plane(dc, pipe_ctx, context); + + if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes) + dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub); + } + + if (pipe_ctx->update_flags.bits.det_size) { + if (dc->res_pool->hubbub->funcs->program_det_size) + dc->res_pool->hubbub->funcs->program_det_size( + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb); + if (dc->res_pool->hubbub->funcs->program_det_segments) + dc->res_pool->hubbub->funcs->program_det_segments( + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); + } + + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) + dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); + + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + hws->funcs.set_hdr_multiplier(pipe_ctx); + + if (hws->funcs.populate_mcm_luts) { + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } + } + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) + hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); + + /* dcn10_translate_regamma_to_hw_format takes 750us to finish + * only do gamma programming for powering on, internal memcmp to avoid + * updating on slave planes + */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); + + /* If the pipe has been enabled or has a different opp, we + * should reprogram the fmt. This deals with cases where + * interation between mpc and odm combine on different streams + * causes a different pipe to be chosen to odm combine with. + */ + if (pipe_ctx->update_flags.bits.enable + || pipe_ctx->update_flags.bits.opp_changed) { + + pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( + pipe_ctx->stream_res.opp, + COLOR_SPACE_YCBCR601, + pipe_ctx->stream->timing.display_color_depth, + pipe_ctx->stream->signal); + + pipe_ctx->stream_res.opp->funcs->opp_program_fmt( + pipe_ctx->stream_res.opp, + &pipe_ctx->stream->bit_depth_params, + &pipe_ctx->stream->clamping); + } + + /* Set ABM pipe after other pipe configurations done */ + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { + if (pipe_ctx->stream_res.abm) { + dc->hwss.set_pipe(pipe_ctx); + pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, + pipe_ctx->stream->abm_level); + } + } + + if (pipe_ctx->update_flags.bits.test_pattern_changed) { + struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp; + struct bit_depth_reduction_params params; + + memset(¶ms, 0, sizeof(params)); + odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, ¶ms); + dc->hwss.set_disp_pattern_generator(dc, + pipe_ctx, + pipe_ctx->stream_res.test_pattern_params.test_pattern, + pipe_ctx->stream_res.test_pattern_params.color_space, + pipe_ctx->stream_res.test_pattern_params.color_depth, + NULL, + pipe_ctx->stream_res.test_pattern_params.width, + pipe_ctx->stream_res.test_pattern_params.height, + pipe_ctx->stream_res.test_pattern_params.offset); + } +} + +void dcn401_program_front_end_for_ctx( + struct dc *dc, + struct dc_state *context) +{ + int i; + unsigned int prev_hubp_count = 0; + unsigned int hubp_count = 0; + struct dce_hwseq *hws = dc->hwseq; + struct pipe_ctx *pipe = NULL; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (resource_is_pipe_topology_changed(dc->current_state, context)) + resource_log_pipe_topology_update(dc, context); + + if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) { + if (pipe->plane_state->triplebuffer_flips) + BREAK_TO_DEBUGGER(); + + /*turn off triple buffer for full update*/ + dc->hwss.program_triplebuffer( + dc, pipe, pipe->plane_state->triplebuffer_flips); + } + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (dc->current_state->res_ctx.pipe_ctx[i].plane_state) + prev_hubp_count++; + if (context->res_ctx.pipe_ctx[i].plane_state) + hubp_count++; + } + + if (prev_hubp_count == 0 && hubp_count > 0) { + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, false); + udelay(500); + } + + /* Set pipe update flags and lock pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + dc->hwss.detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], + &context->res_ctx.pipe_ctx[i]); + + /* When disabling phantom pipes, turn on phantom OTG first (so we can get double + * buffer updates properly) + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; + + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { + struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; + + if (tg->funcs->enable_crtc) { + if (dc->hwseq->funcs.blank_pixel_data) + dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); + + tg->funcs->enable_crtc(tg); + } + } + } + /* OTG blank before disabling all front ends */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable + && !context->res_ctx.pipe_ctx[i].top_pipe + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe + && context->res_ctx.pipe_ctx[i].stream) + hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); + + + /* Disconnect mpcc */ + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom + * then we want to do the programming here (effectively it's being disabled). If we do + * the programming later the DET won't be updated until the OTG for the phantom pipe is + * turned on (i.e. in an MCLK switch) which can come in too late and cause issues with + * DET allocation. + */ + if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || + (context->res_ctx.pipe_ctx[i].plane_state && + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == + SUBVP_PHANTOM))) { + if (hubbub->funcs->program_det_size) + hubbub->funcs->program_det_size(hubbub, + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + if (dc->res_pool->hubbub->funcs->program_det_segments) + dc->res_pool->hubbub->funcs->program_det_segments( + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); + } + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, + &dc->current_state->res_ctx.pipe_ctx[i]); + DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); + } + + /* update ODM for blanked OTG master pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (resource_is_pipe_type(pipe, OTG_MASTER) && + !resource_is_pipe_type(pipe, DPP_PIPE) && + pipe->update_flags.bits.odm && + hws->funcs.update_odm) + hws->funcs.update_odm(dc, context, pipe); + } + + /* + * Program all updated pipes, order matters for mpcc setup. Start with + * top pipe and program all pipes that follow in order + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && !pipe->top_pipe) { + while (pipe) { + if (hws->funcs.program_pipe) + hws->funcs.program_pipe(dc, pipe, context); + else { + /* Don't program phantom pipes in the regular front end programming sequence. + * There is an MPO transition case where a pipe being used by a video plane is + * transitioned directly to be a phantom pipe when closing the MPO video. + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place + * right away) but the MPO still exists until the double buffered update of the + * main pipe so we will get a frame of underflow if the phantom pipe is + * programmed here. + */ + if (pipe->stream && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) + dcn401_program_pipe(dc, pipe, context); + } + + pipe = pipe->bottom_pipe; + } + } + + /* Program secondary blending tree and writeback pipes */ + pipe = &context->res_ctx.pipe_ctx[i]; + if (!pipe->top_pipe && !pipe->prev_odm_pipe + && pipe->stream && pipe->stream->num_wb_info > 0 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) + || pipe->stream->update_flags.raw) + && hws->funcs.program_all_writeback_pipes_in_tree) + hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); + + /* Avoid underflow by check of pipe line read when adding 2nd plane. */ + if (hws->wa.wait_hubpret_read_start_during_mpo_transition && + !pipe->top_pipe && + pipe->stream && + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start && + dc->current_state->stream_status[0].plane_count == 1 && + context->stream_status[0].plane_count > 1) { + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp); + } + } +} + +void dcn401_post_unlock_program_front_end( + struct dc *dc, + struct dc_state *context) +{ + // Timeout for pipe enable + unsigned int timeout_us = 100000; + unsigned int polling_interval_us = 1; + struct dce_hwseq *hwseq = dc->hwseq; + int i; + + DC_LOGGER_INIT(dc->ctx->logger); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) + dc->hwss.post_unlock_reset_opp(dc, + &dc->current_state->res_ctx.pipe_ctx[i]); + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); + + /* + * If we are enabling a pipe, we need to wait for pending clear as this is a critical + * part of the enable operation otherwise, DM may request an immediate flip which + * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which + * is unsupported on DCN. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + // Don't check flip pending on phantom pipes + if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + struct hubp *hubp = pipe->plane_res.hubp; + int j = 0; + + for (j = 0; j < timeout_us / polling_interval_us + && hubp->funcs->hubp_is_flip_pending(hubp); j++) + udelay(polling_interval_us); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + /* When going from a smaller ODM slice count to larger, we must ensure double + * buffer update completes before we return to ensure we don't reduce DISPCLK + * before we've transitioned to 2:1 or 4:1 + */ + if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { + int j = 0; + struct timing_generator *tg = pipe->stream_res.tg; + + if (tg->funcs->get_optc_double_buffer_pending) { + for (j = 0; j < timeout_us / polling_interval_us + && tg->funcs->get_optc_double_buffer_pending(tg); j++) + udelay(polling_interval_us); + } + } + } + + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, false, false); + + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && !pipe->top_pipe) { + /* Program phantom pipe here to prevent a frame of underflow in the MPO transition + * case (if a pipe being used for a video plane transitions to a phantom pipe, it + * can underflow due to HUBP_VTG_SEL programming if done in the regular front end + * programming sequence). + */ + while (pipe) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + /* When turning on the phantom pipe we want to run through the + * entire enable sequence, so apply all the "enable" flags. + */ + if (dc->hwss.apply_update_flags_for_phantom) + dc->hwss.apply_update_flags_for_phantom(pipe); + if (dc->hwss.update_phantom_vp_position) + dc->hwss.update_phantom_vp_position(dc, context, pipe); + dcn401_program_pipe(dc, pipe, context); + } + pipe = pipe->bottom_pipe; + } + } + } + + if (!hwseq) + return; + + /* P-State support transitions: + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes + */ + if (hwseq->funcs.update_force_pstate) + dc->hwseq->funcs.update_force_pstate(dc, context); + + /* Only program the MALL registers after all the main and phantom pipes + * are done programming. + */ + if (hwseq->funcs.program_mall_pipe_config) + hwseq->funcs.program_mall_pipe_config(dc, context); + + /* WA to apply WM setting*/ + if (hwseq->wa.DEGVIDCN21) + dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); + + + /* WA for stutter underflow during MPO transitions when adding 2nd plane */ + if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { + + if (dc->current_state->stream_status[0].plane_count == 1 && + context->stream_status[0].plane_count > 1) { + + struct timing_generator *tg = dc->res_pool->timing_generators[0]; + + dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false); + + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true; + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame = + tg->funcs->get_frame_count(tg); + } + } +} + +bool dcn401_update_bandwidth( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct dce_hwseq *hws = dc->hwseq; + + /* recalculate DML parameters */ + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) + return false; + + /* apply updated bandwidth parameters */ + dc->hwss.prepare_bandwidth(dc, context); + + /* update hubp configs for all pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state == NULL) + continue; + + if (pipe_ctx->top_pipe == NULL) { + bool blank = !is_pipe_tree_visible(pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + dcn401_calculate_vready_offset_for_group(pipe_ctx), + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); + + if (pipe_ctx->prev_odm_pipe == NULL) + hws->funcs.blank_pixel_data(dc, pipe_ctx, blank); + + if (hws->funcs.setup_vupdate_interrupt) + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); + } + + if (pipe_ctx->plane_res.hubp->funcs->hubp_setup2) + pipe_ctx->plane_res.hubp->funcs->hubp_setup2( + pipe_ctx->plane_res.hubp, + &pipe_ctx->hubp_regs, + &pipe_ctx->global_sync, + &pipe_ctx->stream->timing); + } + + return true; +} + +void dcn401_detect_pipe_changes(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe) +{ + bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM; + bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM; + + unsigned int old_pipe_vready_offset_pixels = old_pipe->global_sync.dcn4x.vready_offset_pixels; + unsigned int new_pipe_vready_offset_pixels = new_pipe->global_sync.dcn4x.vready_offset_pixels; + unsigned int old_pipe_vstartup_lines = old_pipe->global_sync.dcn4x.vstartup_lines; + unsigned int new_pipe_vstartup_lines = new_pipe->global_sync.dcn4x.vstartup_lines; + unsigned int old_pipe_vupdate_offset_pixels = old_pipe->global_sync.dcn4x.vupdate_offset_pixels; + unsigned int new_pipe_vupdate_offset_pixels = new_pipe->global_sync.dcn4x.vupdate_offset_pixels; + unsigned int old_pipe_vupdate_width_pixels = old_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; + unsigned int new_pipe_vupdate_width_pixels = new_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; + + new_pipe->update_flags.raw = 0; + + /* If non-phantom pipe is being transitioned to a phantom pipe, + * set disable and return immediately. This is because the pipe + * that was previously in use must be fully disabled before we + * can "enable" it as a phantom pipe (since the OTG will certainly + * be different). The post_unlock sequence will set the correct + * update flags to enable the phantom pipe. + */ + if (old_pipe->plane_state && !old_is_phantom && + new_pipe->plane_state && new_is_phantom) { + new_pipe->update_flags.bits.disable = 1; + return; + } + + if (resource_is_pipe_type(new_pipe, OTG_MASTER) && + resource_is_odm_topology_changed(new_pipe, old_pipe)) + /* Detect odm changes */ + new_pipe->update_flags.bits.odm = 1; + + /* Exit on unchanged, unused pipe */ + if (!old_pipe->plane_state && !new_pipe->plane_state) + return; + /* Detect pipe enable/disable */ + if (!old_pipe->plane_state && new_pipe->plane_state) { + new_pipe->update_flags.bits.enable = 1; + new_pipe->update_flags.bits.mpcc = 1; + new_pipe->update_flags.bits.dppclk = 1; + new_pipe->update_flags.bits.hubp_interdependent = 1; + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + new_pipe->update_flags.bits.unbounded_req = 1; + new_pipe->update_flags.bits.gamut_remap = 1; + new_pipe->update_flags.bits.scaler = 1; + new_pipe->update_flags.bits.viewport = 1; + new_pipe->update_flags.bits.det_size = 1; + if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE && + new_pipe->stream_res.test_pattern_params.width != 0 && + new_pipe->stream_res.test_pattern_params.height != 0) + new_pipe->update_flags.bits.test_pattern_changed = 1; + if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { + new_pipe->update_flags.bits.odm = 1; + new_pipe->update_flags.bits.global_sync = 1; + } + return; + } + + /* For SubVP we need to unconditionally enable because any phantom pipes are + * always removed then newly added for every full updates whenever SubVP is in use. + * The remove-add sequence of the phantom pipe always results in the pipe + * being blanked in enable_stream_timing (DPG). + */ + if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM) + new_pipe->update_flags.bits.enable = 1; + + /* Phantom pipes are effectively disabled, if the pipe was previously phantom + * we have to enable + */ + if (old_pipe->plane_state && old_is_phantom && + new_pipe->plane_state && !new_is_phantom) + new_pipe->update_flags.bits.enable = 1; + + if (old_pipe->plane_state && !new_pipe->plane_state) { + new_pipe->update_flags.bits.disable = 1; + return; + } + + /* Detect plane change */ + if (old_pipe->plane_state != new_pipe->plane_state) + new_pipe->update_flags.bits.plane_changed = true; + + /* Detect top pipe only changes */ + if (resource_is_pipe_type(new_pipe, OTG_MASTER)) { + /* Detect global sync changes */ + if ((old_pipe_vready_offset_pixels != new_pipe_vready_offset_pixels) + || (old_pipe_vstartup_lines != new_pipe_vstartup_lines) + || (old_pipe_vupdate_offset_pixels != new_pipe_vupdate_offset_pixels) + || (old_pipe_vupdate_width_pixels != new_pipe_vupdate_width_pixels)) + new_pipe->update_flags.bits.global_sync = 1; + } + + if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb) + new_pipe->update_flags.bits.det_size = 1; + + /* + * Detect opp / tg change, only set on change, not on enable + * Assume mpcc inst = pipe index, if not this code needs to be updated + * since mpcc is what is affected by these. In fact all of our sequence + * makes this assumption at the moment with how hubp reset is matched to + * same index mpcc reset. + */ + if (old_pipe->stream_res.opp != new_pipe->stream_res.opp) + new_pipe->update_flags.bits.opp_changed = 1; + if (old_pipe->stream_res.tg != new_pipe->stream_res.tg) + new_pipe->update_flags.bits.tg_changed = 1; + + /* + * Detect mpcc blending changes, only dpp inst and opp matter here, + * mpccs getting removed/inserted update connected ones during their own + * programming + */ + if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp + || old_pipe->stream_res.opp != new_pipe->stream_res.opp) + new_pipe->update_flags.bits.mpcc = 1; + + /* Detect dppclk change */ + if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz) + new_pipe->update_flags.bits.dppclk = 1; + + /* Check for scl update */ + if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data))) + new_pipe->update_flags.bits.scaler = 1; + /* Check for vp update */ + if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect)) + || memcmp(&old_pipe->plane_res.scl_data.viewport_c, + &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect))) + new_pipe->update_flags.bits.viewport = 1; + + /* Detect dlg/ttu/rq updates */ + { + struct dml2_display_dlg_regs old_dlg_regs = old_pipe->hubp_regs.dlg_regs; + struct dml2_display_ttu_regs old_ttu_regs = old_pipe->hubp_regs.ttu_regs; + struct dml2_display_rq_regs old_rq_regs = old_pipe->hubp_regs.rq_regs; + struct dml2_display_dlg_regs *new_dlg_regs = &new_pipe->hubp_regs.dlg_regs; + struct dml2_display_ttu_regs *new_ttu_regs = &new_pipe->hubp_regs.ttu_regs; + struct dml2_display_rq_regs *new_rq_regs = &new_pipe->hubp_regs.rq_regs; + + /* Detect pipe interdependent updates */ + if ((old_dlg_regs.dst_y_prefetch != new_dlg_regs->dst_y_prefetch) + || (old_dlg_regs.vratio_prefetch != new_dlg_regs->vratio_prefetch) + || (old_dlg_regs.vratio_prefetch_c != new_dlg_regs->vratio_prefetch_c) + || (old_dlg_regs.dst_y_per_vm_vblank != new_dlg_regs->dst_y_per_vm_vblank) + || (old_dlg_regs.dst_y_per_row_vblank != new_dlg_regs->dst_y_per_row_vblank) + || (old_dlg_regs.dst_y_per_vm_flip != new_dlg_regs->dst_y_per_vm_flip) + || (old_dlg_regs.dst_y_per_row_flip != new_dlg_regs->dst_y_per_row_flip) + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_l != new_dlg_regs->refcyc_per_meta_chunk_vblank_l) + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_c != new_dlg_regs->refcyc_per_meta_chunk_vblank_c) + || (old_dlg_regs.refcyc_per_meta_chunk_flip_l != new_dlg_regs->refcyc_per_meta_chunk_flip_l) + || (old_dlg_regs.refcyc_per_line_delivery_pre_l != new_dlg_regs->refcyc_per_line_delivery_pre_l) + || (old_dlg_regs.refcyc_per_line_delivery_pre_c != new_dlg_regs->refcyc_per_line_delivery_pre_c) + || (old_ttu_regs.refcyc_per_req_delivery_pre_l != new_ttu_regs->refcyc_per_req_delivery_pre_l) + || (old_ttu_regs.refcyc_per_req_delivery_pre_c != new_ttu_regs->refcyc_per_req_delivery_pre_c) + || (old_ttu_regs.refcyc_per_req_delivery_pre_cur0 != + new_ttu_regs->refcyc_per_req_delivery_pre_cur0) + || (old_ttu_regs.min_ttu_vblank != new_ttu_regs->min_ttu_vblank) + || (old_ttu_regs.qos_level_flip != new_ttu_regs->qos_level_flip)) { + old_dlg_regs.dst_y_prefetch = new_dlg_regs->dst_y_prefetch; + old_dlg_regs.vratio_prefetch = new_dlg_regs->vratio_prefetch; + old_dlg_regs.vratio_prefetch_c = new_dlg_regs->vratio_prefetch_c; + old_dlg_regs.dst_y_per_vm_vblank = new_dlg_regs->dst_y_per_vm_vblank; + old_dlg_regs.dst_y_per_row_vblank = new_dlg_regs->dst_y_per_row_vblank; + old_dlg_regs.dst_y_per_vm_flip = new_dlg_regs->dst_y_per_vm_flip; + old_dlg_regs.dst_y_per_row_flip = new_dlg_regs->dst_y_per_row_flip; + old_dlg_regs.refcyc_per_meta_chunk_vblank_l = new_dlg_regs->refcyc_per_meta_chunk_vblank_l; + old_dlg_regs.refcyc_per_meta_chunk_vblank_c = new_dlg_regs->refcyc_per_meta_chunk_vblank_c; + old_dlg_regs.refcyc_per_meta_chunk_flip_l = new_dlg_regs->refcyc_per_meta_chunk_flip_l; + old_dlg_regs.refcyc_per_line_delivery_pre_l = new_dlg_regs->refcyc_per_line_delivery_pre_l; + old_dlg_regs.refcyc_per_line_delivery_pre_c = new_dlg_regs->refcyc_per_line_delivery_pre_c; + old_ttu_regs.refcyc_per_req_delivery_pre_l = new_ttu_regs->refcyc_per_req_delivery_pre_l; + old_ttu_regs.refcyc_per_req_delivery_pre_c = new_ttu_regs->refcyc_per_req_delivery_pre_c; + old_ttu_regs.refcyc_per_req_delivery_pre_cur0 = new_ttu_regs->refcyc_per_req_delivery_pre_cur0; + old_ttu_regs.min_ttu_vblank = new_ttu_regs->min_ttu_vblank; + old_ttu_regs.qos_level_flip = new_ttu_regs->qos_level_flip; + new_pipe->update_flags.bits.hubp_interdependent = 1; + } + /* Detect any other updates to ttu/rq/dlg */ + if (memcmp(&old_dlg_regs, new_dlg_regs, sizeof(old_dlg_regs)) || + memcmp(&old_ttu_regs, new_ttu_regs, sizeof(old_ttu_regs)) || + memcmp(&old_rq_regs, new_rq_regs, sizeof(old_rq_regs))) + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + } + + if (old_pipe->unbounded_req != new_pipe->unbounded_req) + new_pipe->update_flags.bits.unbounded_req = 1; + + if (memcmp(&old_pipe->stream_res.test_pattern_params, + &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) { + new_pipe->update_flags.bits.test_pattern_changed = 1; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 28a513dfc005..17cea748789e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -63,8 +63,6 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx); bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); -struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, - uint8_t region); void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc, const struct pipe_ctx *top_pipe_to_program); @@ -96,5 +94,12 @@ void dcn401_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context); void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx); - +void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context); +void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context); +bool dcn401_update_bandwidth(struct dc *dc, struct dc_state *context); +void dcn401_detect_pipe_changes( + struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index b30f665d98a6..44cb376f97c1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -17,9 +17,9 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .init_hw = dcn401_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .program_front_end_for_ctx = dcn401_program_front_end_for_ctx, .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .post_unlock_program_front_end = dcn401_post_unlock_program_front_end, .update_plane_addr = dcn20_update_plane_addr, .update_dchub = dcn10_update_dchub, .update_pending_status = dcn10_update_pending_status, @@ -42,7 +42,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, + .update_bandwidth = dcn401_update_bandwidth, .set_drr = dcn10_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn31_set_static_screen_control, @@ -99,6 +99,10 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, .program_outstanding_updates = dcn401_program_outstanding_updates, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .detect_pipe_changes = dcn401_detect_pipe_changes, + .enable_plane = dcn20_enable_plane, + .update_dchubp_dpp = dcn20_update_dchubp_dpp, + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index a5bb10d7b160..a7d66cfd93c9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -194,7 +194,6 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, - }; struct block_sequence { @@ -458,6 +457,18 @@ struct hw_sequencer_funcs { struct dc_state *context); void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx); + void (*detect_pipe_changes)(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe); + void (*enable_plane)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); + void (*update_dchubp_dpp)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); + void (*post_unlock_reset_opp)(struct dc *dc, + struct pipe_ctx *opp_head); }; void color_space_to_black_color( @@ -485,11 +496,12 @@ void get_hdr_visual_confirm_color( void get_mpctree_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); - +void get_vabc_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color); void get_subvp_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); - void get_fams2_visual_confirm_color( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 06a420154888..d558efc6e12f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -217,6 +217,7 @@ struct resource_funcs { */ int (*get_power_profile)(const struct dc_state *context); unsigned int (*get_det_buffer_size)(const struct dc_state *context); + unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx); }; struct audio_support{ @@ -465,6 +466,7 @@ struct pipe_ctx { unsigned int surface_size_in_mall_bytes; struct dml2_dchub_per_pipe_register_set hubp_regs; struct dml2_hubp_pipe_mcache_regs mcache_regs; + union dml2_global_sync_programming global_sync; struct dwbc *dwbc; struct mcif_wb *mcif_wb; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 2d06067ff36d..c14d64687a3d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -306,6 +306,9 @@ struct clk_mgr_funcs { */ void (*set_hard_min_memclk)(struct clk_mgr *clk_mgr, bool current_mode); + int (*get_hard_min_memclk)(struct clk_mgr *clk_mgr); + int (*get_hard_min_fclk)(struct clk_mgr *clk_mgr); + /* Send message to PMFW to set hard max memclk frequency to highest DPM */ void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index d0878fc0cc94..b610beb075d5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -42,6 +42,7 @@ #include "cursor_reg_cache.h" #include "dml2/dml21/inc/dml_top_dchub_registers.h" +#include "dml2/dml21/inc/dml_top_types.h" #define OPP_ID_INVALID 0xf #define MAX_TTU 0xffffff @@ -144,14 +145,26 @@ struct hubp_funcs { struct _vcs_dpi_display_rq_regs_st *rq_regs, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + void (*hubp_setup2)( + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *pipe_regs, + union dml2_global_sync_programming *pipe_global_sync, + struct dc_crtc_timing *timing); + void (*hubp_setup_interdependent)( struct hubp *hubp, struct _vcs_dpi_display_dlg_regs_st *dlg_regs, struct _vcs_dpi_display_ttu_regs_st *ttu_regs); + void (*hubp_setup_interdependent2)( + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *pipe_regs); + void (*dcc_control)(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size); + void (*hubp_reset)(struct hubp *hubp); + void (*mem_program_viewport)( struct hubp *hubp, const struct rect *viewport, @@ -165,7 +178,7 @@ struct hubp_funcs { void (*hubp_program_pte_vm)( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation); void (*hubp_set_vm_system_aperture_settings)( @@ -179,7 +192,7 @@ struct hubp_funcs { void (*hubp_program_surface_config)( struct hubp *hubp, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index af9183f5d69b..08c16ba52a51 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -168,6 +168,14 @@ struct link_encoder_funcs { struct link_encoder *enc, enum encoder_type_select sel, uint32_t hpo_inst); + void (*enable_dpia_output)(struct link_encoder *enc, + const struct dc_link_settings *link_settings, + uint8_t dpia_id, + uint8_t digmode, + uint8_t fec_rdy); + void (*disable_dpia_output)(struct link_encoder *link_enc, + uint8_t dpia_id, + uint8_t digmode); }; /* diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 4f5d102455ca..42fbc70f7056 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -150,7 +150,7 @@ struct mem_input_funcs { void (*mem_input_program_pte_vm)( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, enum dc_rotation_angle rotation); void (*mem_input_set_vm_system_aperture_settings)( @@ -164,7 +164,7 @@ struct mem_input_funcs { void (*mem_input_program_surface_config)( struct mem_input *mem_input, enum surface_pixel_format format, - union dc_tiling_info *tiling_info, + struct dc_tiling_info *tiling_info, struct plane_size *plane_size, enum dc_rotation_angle rotation, struct dc_plane_dcc_param *dcc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 03cbcbb36f1c..6fdc9809280c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -210,7 +210,7 @@ void optc1_enable_crtc_reset(struct timing_generator *optc, bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params); -bool optc1_get_crc(struct timing_generator *optc, +bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index b74e18cc1e66..9885cb3c310f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -141,6 +141,9 @@ struct crc_params { bool continuous_mode; bool enable; + + uint8_t crc_eng_inst; + bool reset; }; /** @@ -291,7 +294,7 @@ struct timing_generator_funcs { * @get_crc: Get CRCs for the given timing generator. Return false if * CRCs are not enabled (via configure_crc). */ - bool (*get_crc)(struct timing_generator *tg, + bool (*get_crc)(struct timing_generator *tg, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); void (*program_manual_trigger)(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index f04292086c08..fd1f9d3db039 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -148,6 +148,10 @@ struct link_service { const struct dc_stream_state *stream, const unsigned int num_streams); + uint32_t (*dp_required_hblank_size_bytes)( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + /*************************** DPMS *************************************/ void (*set_dpms_on)(struct dc_state *state, struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index ff8fe1a94965..96febabf464a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -251,7 +251,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link) link_training_settings.lttpr_mode = dp_decide_lttpr_mode(link, &link->cur_link_settings); - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && link_training_settings.lttpr_mode == LTTPR_MODE_TRANSPARENT) dp_fixed_vs_pe_read_lane_adjust( link, @@ -646,7 +646,7 @@ bool dp_set_test_pattern( if (IS_DP_PHY_PATTERN(test_pattern)) { /* Set DPCD Lane Settings before running test pattern */ if (p_link_settings != NULL) { - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && p_link_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) { dp_fixed_vs_pe_set_retimer_lane_settings( link, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 3e47a6735912..06faa461067b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -164,7 +164,9 @@ void disable_dio_link_output(struct dc_link *link, { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); - link_enc->funcs->disable_output(link_enc, signal); + if (link_enc != NULL) + link_enc->funcs->disable_output(link_enc, signal); + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); } diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c index 348ea4cb832d..a6d1d7641ab4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c @@ -187,7 +187,7 @@ static const struct link_hwss dio_fixed_vs_pe_retimer_link_hwss = { bool requires_fixed_vs_pe_retimer_dio_link_hwss(const struct dc_link *link) { - return (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN); + return ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN); } const struct link_hwss *get_dio_fixed_vs_pe_retimer_link_hwss(void) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c index 6499807af72a..36adf95744fe 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c @@ -77,17 +77,74 @@ static void set_dio_dpia_lane_settings(struct dc_link *link, { } +static void enable_dpia_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal, + enum clock_source_id clock_source, + const struct dc_link_settings *link_settings) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (link_enc != NULL) { + if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->enable_dpia_output) { + uint8_t fec_rdy = link->dc->link_srv->dp_should_enable_fec(link); + uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE; + + link_enc->funcs->enable_dpia_output( + link_enc, + link_settings, + link->ddc_hw_inst, + digmode, + fec_rdy); + } else { + if (dc_is_dp_sst_signal(signal)) + link_enc->funcs->enable_dp_output( + link_enc, + link_settings, + clock_source); + else + link_enc->funcs->enable_dp_mst_output( + link_enc, + link_settings, + clock_source); + } + + } + + link->dc->link_srv->dp_trace_source_sequence(link, + DPCD_SOURCE_SEQ_AFTER_ENABLE_LINK_PHY); +} + +static void disable_dpia_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (link_enc != NULL) { + if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->disable_dpia_output) { + uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE; + + link_enc->funcs->disable_dpia_output(link_enc, link->ddc_hw_inst, digmode); + } else + link_enc->funcs->disable_output(link_enc, signal); + } + + link->dc->link_srv->dp_trace_source_sequence(link, + DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); +} + static const struct link_hwss dpia_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, .setup_stream_attribute = setup_dio_stream_attribute, - .disable_link_output = disable_dio_link_output, + .disable_link_output = disable_dpia_link_output, .setup_audio_output = setup_dio_audio_output, .enable_audio_packet = enable_dio_audio_packet, .disable_audio_packet = disable_dio_audio_packet, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, - .enable_dp_link_output = enable_dio_dp_link_output, + .enable_dp_link_output = enable_dpia_link_output, .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern, .set_dp_lane_settings = set_dio_dpia_lane_settings, .update_stream_allocation_table = update_dpia_stream_allocation_table, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h index ad16ec5d9bb7..259e0f4775e1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h @@ -27,6 +27,9 @@ #include "link_hwss.h" +#define DIG_SST_MODE 0 +#define DIG_MST_MODE 5 + const struct link_hwss *get_dpia_link_hwss(void); bool can_use_dpia_link_hwss(const struct dc_link *link, const struct link_resource *link_res); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index e026c728042a..550e1a098fa2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -829,7 +829,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, if (link->dc->debug.skip_detection_link_training || dc_is_embedded_signal(link->local_sink->sink_signal) || - link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training)) { destrictive = false; } else if (link_dp_get_encoding_format(&max_link_cap) == DP_8b_10b_ENCODING) { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 60e64e0138a3..ec7de9c01fab 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -772,6 +772,20 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) return result; } +static bool dp_set_hblank_reduction_on_rx(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dc_stream_state *stream = pipe_ctx->stream; + bool result = false; + + if (dc_is_virtual_signal(stream->signal)) + result = true; + else + result = dm_helpers_dp_write_hblank_reduction(dc->ctx, stream); + return result; +} + + /* The stream with these settings can be sent (unblanked) only after DSC was enabled on RX first, * i.e. after dp_enable_dsc_on_rx() had been called */ @@ -1960,8 +1974,8 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps & - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; - if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { /* DP159, Retimer settings */ eng_id = pipe_ctx->stream_res.stream_enc->id; @@ -1972,7 +1986,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) write_i2c_default_retimer_setting(pipe_ctx, is_vga_mode, is_over_340mhz); } - } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { /* PI3EQX1204, Redriver settings */ write_i2c_redriver_setting(pipe_ctx, is_over_340mhz); } @@ -2028,7 +2042,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, int lt_attempts = LINK_TRAINING_ATTEMPTS; // Increase retry count if attempting DP1.x on FIXED_VS link - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) lt_attempts = 10; @@ -2043,7 +2057,8 @@ static enum dc_status enable_link_dp(struct dc_state *state, /* Train with fallback when enabling DPIA link. Conventional links are * trained with fallback during sink detection. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training) do_fallback = true; /* @@ -2379,13 +2394,13 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id; unsigned short masked_chip_caps = link->chip_caps & - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; //Need to inform that sink is going to use legacy HDMI mode. write_scdc_data( link->ddc, 165000,//vbios only handles 165Mhz. false); - if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { /* DP159, Retimer settings */ if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) write_i2c_retimer_setting(pipe_ctx, @@ -2393,7 +2408,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) else write_i2c_default_retimer_setting(pipe_ctx, false, false); - } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { /* PI3EQX1204, Redriver settings */ write_i2c_redriver_setting(pipe_ctx, false); } @@ -2533,6 +2548,15 @@ void link_set_dpms_on( if (pipe_ctx->stream->dpms_off) return; + /* For Dp tunneling link, a pending HPD means that we have a race condition between processing + * current link and processing the pending HPD. If we enable the link now, we may end up with a + * link that is not actually connected to a sink. So we skip enabling the link in this case. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) { + DC_LOG_DEBUG("%s, Link%d HPD is pending, not enable it.\n", __func__, link->link_index); + return; + } + /* Have to setup DSC before DIG FE and BE are connected (which happens before the * link training). This is to make sure the bandwidth sent to DIG BE won't be * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag @@ -2598,6 +2622,9 @@ void link_set_dpms_on( } } + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + dp_set_hblank_reduction_on_rx(pipe_ctx); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) allocate_usb4_bandwidth(pipe_ctx->stream); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 5e1b5ab9fbc6..a7877d57a00f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -101,6 +101,7 @@ static void construct_link_service_validation(struct link_service *link_srv) link_srv->validate_mode_timing = link_validate_mode_timing; link_srv->dp_link_bandwidth_kbps = dp_link_bandwidth_kbps; link_srv->validate_dpia_bandwidth = link_validate_dpia_bandwidth; + link_srv->dp_required_hblank_size_bytes = dp_required_hblank_size_bytes; } /* link dpms owns the programming sequence of stream's dpms state associated @@ -698,7 +699,7 @@ static bool construct_phy(struct dc_link *link, link->chip_caps); } - if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) { + if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) { link->bios_forced_drive_settings.VOLTAGE_SWING = (bios->integrated_info->ext_disp_conn_info.fixdpvoltageswing & 0x3); link->bios_forced_drive_settings.PRE_EMPHASIS = diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 60f15a9ba7a5..29606fda029d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -409,3 +409,182 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); } + +struct dp_audio_layout_config { + uint8_t layouts_per_sample_denom; + uint8_t symbols_per_layout; + uint8_t max_layouts_per_audio_sdp; +}; + +static void get_audio_layout_config( + uint32_t channel_count, + enum dp_link_encoding encoding, + struct dp_audio_layout_config *output) +{ + memset(output, 0, sizeof(struct dp_audio_layout_config)); + + /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP, + * with each layout being the same size (8ch layout). + */ + if (encoding == DP_8b_10b_ENCODING) { + if (channel_count == 2) { + output->layouts_per_sample_denom = 4; + output->symbols_per_layout = 40; + output->max_layouts_per_audio_sdp = 1; + } else if (channel_count == 8 || channel_count == 6) { + output->layouts_per_sample_denom = 1; + output->symbols_per_layout = 40; + output->max_layouts_per_audio_sdp = 1; + } + } else if (encoding == DP_128b_132b_ENCODING) { + if (channel_count == 2) { + output->layouts_per_sample_denom = 4; + output->symbols_per_layout = 10; + output->max_layouts_per_audio_sdp = 1; + } else if (channel_count == 8 || channel_count == 6) { + output->layouts_per_sample_denom = 1; + output->symbols_per_layout = 10; + output->max_layouts_per_audio_sdp = 1; + } + } +} + +static uint32_t get_av_stream_map_lane_count( + enum dp_link_encoding encoding, + enum dc_lane_count lane_count, + bool is_mst) +{ + uint32_t av_stream_map_lane_count = 0; + + if (encoding == DP_8b_10b_ENCODING) { + if (!is_mst) + av_stream_map_lane_count = lane_count; + else + av_stream_map_lane_count = 4; + } else if (encoding == DP_128b_132b_ENCODING) { + av_stream_map_lane_count = 4; + } + + ASSERT(av_stream_map_lane_count != 0); + + return av_stream_map_lane_count; +} + +static uint32_t get_audio_sdp_overhead( + enum dp_link_encoding encoding, + enum dc_lane_count lane_count, + bool is_mst) +{ + uint32_t audio_sdp_overhead = 0; + + if (encoding == DP_8b_10b_ENCODING) { + if (is_mst) + audio_sdp_overhead = 16; /* 4 * 2 + 8 */ + else + audio_sdp_overhead = lane_count * 2 + 8; + } else if (encoding == DP_128b_132b_ENCODING) { + audio_sdp_overhead = 10; /* 4 x 2.5 */ + } + + ASSERT(audio_sdp_overhead != 0); + + return audio_sdp_overhead; +} + +/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST. + */ +static uint32_t calculate_overhead_hblank_bw_in_symbols( + uint32_t max_slice_h) +{ + uint32_t overhead_hblank_bw = 0; /* in stream symbols */ + + overhead_hblank_bw += max_slice_h * 4; /* EOC overhead */ + overhead_hblank_bw += 12; /* Main link overhead (VBID, BS/BE) */ + + return overhead_hblank_bw; +} + +uint32_t dp_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params) +{ + /* Main logic from dce_audio is duplicated here, with the main + * difference being: + * - Pre-determined lane count of 4 + * - Assumed 16 dsc slices for worst case + * - Assumed SDP split disabled for worst case + * TODO: Unify logic from dce_audio to prevent duplicated logic. + */ + + const struct dc_crtc_timing *timing = audio_params->crtc_timing; + const uint32_t channel_count = audio_params->channel_count; + const uint32_t sample_rate_hz = audio_params->sample_rate_hz; + const enum dp_link_encoding link_encoding = audio_params->link_encoding; + + // 8b/10b MST and 128b/132b are always 4 logical lanes. + const uint32_t lane_count = 4; + const bool is_mst = (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT); + // Maximum slice count is with ODM 4:1, 4 slices per DSC + const uint32_t max_slices_h = 16; + + const uint32_t av_stream_map_lane_count = get_av_stream_map_lane_count( + link_encoding, lane_count, is_mst); + const uint32_t audio_sdp_overhead = get_audio_sdp_overhead( + link_encoding, lane_count, is_mst); + struct dp_audio_layout_config layout_config; + + if (link_encoding == DP_8b_10b_ENCODING && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) + return 0; + + get_audio_layout_config( + channel_count, link_encoding, &layout_config); + + /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */ + struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100); + struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction( + timing->pix_clk_100hz, (long long)timing->h_total * 10); + struct fixed31_32 samples_per_line; + struct fixed31_32 layouts_per_line; + struct fixed31_32 symbols_per_sdp_max_layout; + struct fixed31_32 remainder; + uint32_t num_sdp_with_max_layouts; + uint32_t required_symbols_per_hblank; + uint32_t required_bytes_per_hblank = 0; + + samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000); + samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz); + layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config.layouts_per_sample_denom); + // HBlank expansion usage assumes SDP split disabled to allow for worst case. + layouts_per_line = dc_fixpt_from_int(dc_fixpt_ceil(layouts_per_line)); + + num_sdp_with_max_layouts = dc_fixpt_floor( + dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp)); + symbols_per_sdp_max_layout = dc_fixpt_from_int( + layout_config.max_layouts_per_audio_sdp * layout_config.symbols_per_layout); + symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead); + symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin); + required_symbols_per_hblank = num_sdp_with_max_layouts; + required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) / + av_stream_map_lane_count) * av_stream_map_lane_count; + + if (num_sdp_with_max_layouts != dc_fixpt_ceil( + dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp))) { + remainder = dc_fixpt_sub_int(layouts_per_line, + num_sdp_with_max_layouts * layout_config.max_layouts_per_audio_sdp); + remainder = dc_fixpt_mul_int(remainder, layout_config.symbols_per_layout); + remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead); + remainder = dc_fixpt_mul(remainder, audio_sdp_margin); + required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) / + av_stream_map_lane_count) * av_stream_map_lane_count; + } + + required_symbols_per_hblank += calculate_overhead_hblank_bw_in_symbols(max_slices_h); + + if (link_encoding == DP_8b_10b_ENCODING) + required_bytes_per_hblank = required_symbols_per_hblank; // 8 bits per 8b/10b symbol + else if (link_encoding == DP_128b_132b_ENCODING) + required_bytes_per_hblank = required_symbols_per_hblank * 4; // 32 bits per 128b/132b symbol + + return required_bytes_per_hblank; +} + diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index 595fb05946e9..bf398c49c3e8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -37,4 +37,9 @@ uint32_t dp_link_bandwidth_kbps( const struct dc_link *link, const struct dc_link_settings *link_settings); + +uint32_t dp_required_hblank_size_bytes( + const struct dc_link *link, + struct dp_audio_bandwidth_params *audio_params); + #endif /* __LINK_VALIDATION_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c index d6d5bbf2108c..267180e7bc48 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c @@ -505,7 +505,7 @@ bool try_to_configure_aux_timeout(struct ddc_service *ddc, bool result = false; struct ddc *ddc_pin = ddc->ddc_pin; - if ((ddc->link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((ddc->link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && !ddc->link->dc->debug.disable_fixed_vs_aux_timeout_wa && ddc->ctx->dce_version == DCN_VERSION_3_1) { /* Fixed VS workaround for AUX timeout */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index ce9d799d2386..44c3023a7731 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1554,7 +1554,7 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. @@ -1779,6 +1779,11 @@ static bool retrieve_link_cap(struct dc_link *link) link->test_pattern_enabled = false; link->compliance_test_state.raw = 0; + link->dpcd_caps.receive_port0_cap.raw[0] = + dpcd_data[DP_RECEIVE_PORT_0_CAP_0 - DP_DPCD_REV]; + link->dpcd_caps.receive_port0_cap.raw[1] = + dpcd_data[DP_RECEIVE_PORT_0_BUFFER_SIZE - DP_DPCD_REV]; + /* read sink count */ core_link_read_dpcd(link, DP_SINK_COUNT, @@ -2307,6 +2312,14 @@ bool dp_verify_link_cap_with_retries( } else { link->verified_link_cap = last_verified_link_cap; } + + /* For Dp tunneling link, a pending HPD means that we have a race condition between processing + * current link and processing the pending HPD. Since the training is failed, we should just brak + * the loop so that we have chance to process the pending HPD. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) + break; + fsleep(10 * 1000); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 48abeaa88678..a08403c022ea 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -226,6 +226,8 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) replay_configuration.bits.STATE_TRANSITION_ERROR_STATUS) { bool allow_active; + link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw; + if (link->replay_settings.config.force_disable_desync_error_check) return; @@ -237,6 +239,9 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_configuration.raw, sizeof(replay_configuration.raw)); + /* Update desync error counter */ + link->replay_settings.replay_desync_error_fail_count++; + /* Acknowledge and clear error bits */ dm_helpers_dp_write_dpcd( link->ctx, @@ -408,7 +413,8 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { // Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + !link->dc->config.enable_dpia_pre_training) link->skip_fallback_on_link_loss = true; device_service_clear.bits.AUTOMATED_TEST = 1; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c index bafa52a0165a..2c73ac87cd66 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c @@ -104,7 +104,7 @@ void dp_set_hw_lane_settings( // Don't return here if using FIXED_VS link HWSS and encoding is 128b/132b if ((link_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && !is_immediate_downstream(link, offset) && - (!(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) || + (!((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) || link_dp_get_encoding_format(&link_settings->link_settings) == DP_8b_10b_ENCODING)) return; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 754c895e1bfb..88d4288cde0f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -739,7 +739,7 @@ void override_training_settings( if (overrides->ffe_preset != NULL) lt_settings->ffe_preset = overrides->ffe_preset; /* Override HW lane settings with BIOS forced values if present */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) { lt_settings->voltage_swing = &link->bios_forced_drive_settings.VOLTAGE_SWING; lt_settings->pre_emphasis = &link->bios_forced_drive_settings.PRE_EMPHASIS; @@ -1574,7 +1574,7 @@ enum link_training_result dp_perform_link_training( * Per DP specs starting from here, DPTX device shall not issue * Non-LT AUX transactions inside training mode. */ - if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) + if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); else if (encoding == DP_8b_10b_ENCODING) status = dp_perform_8b_10b_link_training(link, link_res, <_settings); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 097d06023e64..19d5ebc6763c 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -302,7 +302,6 @@ void optc1_program_timing( /* Enable stereo - only when we need to pack 3D frame. Other types * of stereo handled in explicit call */ - if (optc->funcs->is_two_pixels_per_container(&patched_crtc_timing) || optc1->opp_count == 2) h_div = H_TIMING_DIV_BY2; @@ -1471,37 +1470,71 @@ bool optc1_configure_crc(struct timing_generator *optc, if (!optc1_is_tg_enabled(optc)) return false; - REG_WRITE(OTG_CRC_CNTL, 0); + if (!params->enable || params->reset) + REG_WRITE(OTG_CRC_CNTL, 0); if (!params->enable) return true; /* Program frame boundaries */ - /* Window A x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - - /* Window A y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - - /* Window B x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - - /* Window B y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable.*/ + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + default: + return false; + } return true; } @@ -1510,6 +1543,7 @@ bool optc1_configure_crc(struct timing_generator *optc, * optc1_get_crc - Capture CRC result per component * * @optc: timing_generator instance. + * @idx: index of crc engine to get CRC from * @r_cr: 16-bit primary CRC signature for red data. * @g_y: 16-bit primary CRC signature for green data. * @b_cb: 16-bit primary CRC signature for blue data. @@ -1521,7 +1555,7 @@ bool optc1_configure_crc(struct timing_generator *optc, * If CRC is disabled, return false; otherwise, return true, and the CRC * results in the parameters. */ -bool optc1_get_crc(struct timing_generator *optc, +bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t field = 0; @@ -1533,14 +1567,30 @@ bool optc1_get_crc(struct timing_generator *optc, if (!field) return false; - /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ - REG_GET_2(OTG_CRC0_DATA_RG, - CRC0_R_CR, r_cr, - CRC0_G_Y, g_y); + switch (idx) { + case 0: + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ + REG_GET_2(OTG_CRC0_DATA_RG, + CRC0_R_CR, r_cr, + CRC0_G_Y, g_y); - /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ - REG_GET(OTG_CRC0_DATA_B, - CRC0_B_CB, b_cb); + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ + REG_GET(OTG_CRC0_DATA_B, + CRC0_B_CB, b_cb); + break; + case 1: + /* OTG_CRC1_DATA_RG has the CRC16 results for the red and green component */ + REG_GET_2(OTG_CRC1_DATA_RG, + CRC1_R_CR, r_cr, + CRC1_G_Y, g_y); + + /* OTG_CRC1_DATA_B has the CRC16 results for the blue component */ + REG_GET(OTG_CRC1_DATA_B, + CRC1_B_CB, b_cb); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 40757f20d73f..159172178d51 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -86,6 +86,12 @@ SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_DATA_RG, OTG, inst),\ + SRI(OTG_CRC1_DATA_B, OTG, inst),\ + SRI(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\ SR(GSL_SOURCE_SELECT),\ SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst) @@ -315,6 +321,7 @@ struct dcn_optc_registers { SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC1_SELECT, mask_sh),\ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ @@ -327,6 +334,17 @@ struct dcn_optc_registers { SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ @@ -482,6 +500,7 @@ struct dcn_optc_registers { type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\ type OTG_CRC_CONT_EN;\ type OTG_CRC0_SELECT;\ + type OTG_CRC1_SELECT;\ type OTG_CRC_EN;\ type CRC0_R_CR;\ type CRC0_G_Y;\ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index dfa9364fe5a6..d21e82b927d0 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -183,34 +183,87 @@ static bool optc35_configure_crc(struct timing_generator *optc, { struct optc *optc1 = DCN10TG_FROM_TG(optc); + /* Cannot configure crc on a CRTC that is disabled */ if (!optc1_is_tg_enabled(optc)) return false; - REG_WRITE(OTG_CRC_CNTL, 0); + + if (!params->enable || params->reset) + REG_WRITE(OTG_CRC_CNTL, 0); + if (!params->enable) return true; - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) { - REG_UPDATE_4(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1, - OTG_CRC_WINDOW_DB_EN, 1); - } else - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); + + /* Program frame boundaries */ + switch (params->crc_eng_inst) { + case 0: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) + REG_UPDATE_4(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1, + OTG_CRC_WINDOW_DB_EN, 1); + else + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + case 1: + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); + + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) + REG_UPDATE_4(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1, + OTG_CRC_WINDOW_DB_EN, 1); + else + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC1_SELECT, params->selection, + OTG_CRC_EN, 1); + break; + default: + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index 770a380cc03d..e92f14d50adb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -1258,6 +1258,11 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( return NULL; } +unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) +{ + return pipe_ctx->pipe_dlg_param.vstartup_start; +} + static const struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; @@ -1272,7 +1277,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h index bf8e33cd8147..7bc1be53e800 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h @@ -51,6 +51,7 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( const struct resource_pool *pool, struct dc_stream_state *stream); +unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); #endif /* __DC_RESOURCE_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 5c616b1f7bf7..5c6dc710e96c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2229,7 +2229,8 @@ static const struct resource_funcs dcn20_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 9f37f0097feb..43fa2cb117f3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1079,7 +1079,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn201_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 021ba8ac5c8c..2615c36d5ffe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1378,6 +1378,7 @@ static const struct resource_funcs dcn21_res_pool_funcs = { .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, .update_bw_bounding_box = dcn21_update_bw_bounding_box, .get_panel_config_defaults = dcn21_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn21_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index bfd0eccbed28..13202ce30d66 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2250,6 +2250,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .update_bw_bounding_box = dcn30_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn30_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; #define CTX ctx diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index a9816affd312..121a86a59833 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -671,9 +671,9 @@ static const struct dc_plane_cap plane_cap = { /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 + .argb8888 = 358, + .nv12 = 358, + .fp16 = 358 }, 64, 64 @@ -693,7 +693,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ + .max_downscale_src_width = 4096,/*upto true 4k*/ .scl_reset_length10 = true, .sanity_checks = false, .underflow_assert_delay_us = 0xFFFFFFFF, @@ -1400,7 +1400,8 @@ static struct resource_funcs dcn301_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn301_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn301_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 7baefc910a3d..012c5fd52cb1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1151,6 +1151,7 @@ static struct resource_funcs dcn302_res_pool_funcs = { .update_bw_bounding_box = dcn302_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn302_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 8a57d46ad15f..a8d0b4686f9a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1096,6 +1096,7 @@ static struct resource_funcs dcn303_res_pool_funcs = { .update_bw_bounding_box = dcn303_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn303_get_panel_config_defaults, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 54ec3d8e920c..911bd60d4fbc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1849,6 +1849,7 @@ static struct resource_funcs dcn31_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn31_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 2794473f2aff..e3ba105034f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1778,6 +1778,7 @@ static struct resource_funcs dcn314_res_pool_funcs = { .get_panel_config_defaults = dcn314_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 4ee33eb3381d..14acef036b5a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1846,6 +1846,7 @@ static struct resource_funcs dcn315_res_pool_funcs = { .get_panel_config_defaults = dcn315_get_panel_config_defaults, .get_power_profile = dcn315_get_power_profile, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn315_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 79eddbafe3c2..568094827212 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1720,6 +1720,7 @@ static struct resource_funcs dcn316_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn316_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn316_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 12d247a7ec45..664302876019 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2066,6 +2066,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 06b9479c8bd3..38d76434683e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1624,6 +1624,7 @@ static struct resource_funcs dcn321_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 89e2adcf2a28..8ee3d99ea2aa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1752,6 +1752,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, return out; } +enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + plane_state->tiling_info.gfxversion = DcGfxVersion9; + dcn20_patch_unknown_plane_state(plane_state); + return DC_OK; +} + static struct resource_funcs dcn35_res_pool_funcs = { .destroy = dcn35_destroy_resource_pool, @@ -1775,10 +1782,11 @@ static struct resource_funcs dcn35_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn35_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h index f97bb4cb3761..9d03a55d90cf 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h @@ -35,6 +35,7 @@ extern struct _vcs_dpi_ip_params_st dcn3_5_ip; extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc; +enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state); struct dcn35_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 263a37c1cd3a..14f7c3acdc96 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1754,10 +1754,11 @@ static struct resource_funcs dcn351_res_pool_funcs = { .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn351_update_bw_bounding_box_fpu, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe }; static bool dcn351_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 09f5b8b40791..c1ebc6b1c937 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -737,7 +737,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_stall_recovery = true, } }, - .force_cositing = CHROMA_COSITING_TOPLEFT + 1, + .force_cositing = CHROMA_COSITING_NONE + 1, }; static struct dce_aux *dcn401_aux_engine_create( @@ -1297,6 +1297,29 @@ static struct hpo_dp_link_encoder *dcn401_hpo_dp_link_encoder_create( return &hpo_dp_enc31->base; } +static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) +{ + unsigned int num_available_chans = 1; + + /* channels for MALL must be a power of 2 */ + while (num_chans > 1) { + num_available_chans = (num_available_chans << 1); + num_chans = (num_chans >> 1); + } + + /* cannot be odd */ + num_available_chans &= ~1; + + /* clamp to max available channels for MALL per ASIC */ + if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; + } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; + } + + return num_available_chans; +} + static struct dce_hwseq *dcn401_hwseq_create( struct dc_context *ctx) { @@ -1592,6 +1615,14 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); + /* re-calculate the available MALL size if required */ + if (bw_params->num_channels > 0) { + dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall( + dc, bw_params->num_channels) * + dc->caps.mall_size_per_mem_channel * 1024 * 1024; + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; + } + DC_FP_START(); dcn401_update_bw_bounding_box_fpu(dc, bw_params); @@ -1609,6 +1640,7 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state) { + plane_state->tiling_info.gfxversion = DcGfxAddr3; plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D; return DC_OK; } @@ -1708,27 +1740,9 @@ static int dcn401_get_power_profile(const struct dc_state *context) return dpm_level; } -static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) +static unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) { - unsigned int num_available_chans = 1; - - /* channels for MALL must be a power of 2 */ - while (num_chans > 1) { - num_available_chans = (num_available_chans << 1); - num_chans = (num_chans >> 1); - } - - /* cannot be odd */ - num_available_chans &= ~1; - - /* clamp to max available channels for MALL per ASIC */ - if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { - num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; - } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { - num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; - } - - return num_available_chans; + return pipe_ctx->global_sync.dcn4x.vstartup_lines; } static struct resource_funcs dcn401_res_pool_funcs = { @@ -1758,6 +1772,7 @@ static struct resource_funcs dcn401_res_pool_funcs = { .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_power_profile = dcn401_get_power_profile, + .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 1306ce0321e2..38a9a0d68058 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -11,6 +11,41 @@ #define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 +static bool spl_is_yuv420(enum spl_pixel_format format) +{ + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) + return true; + + return false; +} + +static bool spl_is_rgb8(enum spl_pixel_format format) +{ + if (format == SPL_PIXEL_FORMAT_ARGB8888) + return true; + + return false; +} + +static bool spl_is_video_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN + && format <= SPL_PIXEL_FORMAT_VIDEO_END) + return true; + else + return false; +} + +static bool spl_is_subsampled_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN + && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) + return true; + else + return false; +} + static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) { struct spl_rect rec; @@ -408,8 +443,7 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; - if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { + if (spl_is_yuv420(spl_in->basic_in.format)) { spl_scratch->scl_data.ratios.horz_c.value /= 2; spl_scratch->scl_data.ratios.vert_c.value /= 2; } @@ -546,41 +580,6 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, *vp_offset = src_size - *vp_offset - *vp_size; } -static bool spl_is_yuv420(enum spl_pixel_format format) -{ - if ((format >= SPL_PIXEL_FORMAT_420BPP8) && - (format <= SPL_PIXEL_FORMAT_420BPP10)) - return true; - - return false; -} - -static bool spl_is_rgb8(enum spl_pixel_format format) -{ - if (format == SPL_PIXEL_FORMAT_ARGB8888) - return true; - - return false; -} - -static bool spl_is_video_format(enum spl_pixel_format format) -{ - if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN - && format <= SPL_PIXEL_FORMAT_VIDEO_END) - return true; - else - return false; -} - -static bool spl_is_subsampled_format(enum spl_pixel_format format) -{ - if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN - && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) - return true; - else - return false; -} - /*Calculate inits and viewport */ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_scratch *spl_scratch) @@ -591,8 +590,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_rect recout_clip_in_recout_dst; struct spl_rect overlap_in_active_timing; struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in); - int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; + int vpc_div = spl_is_subsampled_format(spl_in->basic_in.format) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; @@ -620,11 +618,6 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, &flip_vert_scan_dir, &flip_horz_scan_dir); - if (orthogonal_rotation) { - spl_swap(src.width, src.height); - spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); - } - if (spl_is_subsampled_format(spl_in->basic_in.format)) { /* this gives the direction of the cositing (negative will move * left, right otherwise) @@ -633,15 +626,15 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, switch (spl_in->basic_in.cositing) { - case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_zero; + case CHROMA_COSITING_TOPLEFT: + init_adj_h = spl_fixpt_from_fraction(sign, 4); init_adj_v = spl_fixpt_from_fraction(sign, 4); break; - case CHROMA_COSITING_NONE: + case CHROMA_COSITING_LEFT: init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_zero; break; - case CHROMA_COSITING_TOPLEFT: + case CHROMA_COSITING_NONE: default: init_adj_h = spl_fixpt_zero; init_adj_v = spl_fixpt_zero; @@ -649,6 +642,12 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, } } + if (orthogonal_rotation) { + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(init_adj_h, init_adj_v); + } + spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, @@ -1599,7 +1598,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x0; // fp1.5.10, C3 coefficient } - if (spl_is_video_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ + if (spl_is_subsampled_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ dscl_prog_data->easf_matrix_mode = 1; /* * 2-bit, BF3 chroma mode correction calculation mode diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 59990929e44e..d0fe324cb537 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -431,7 +431,68 @@ union replay_debug_flags { */ uint32_t enable_ips_residency_profiling : 1; - uint32_t reserved : 20; + /** + * 0x1000 (bit 12) + * @enable_coasting_vtotal_check: Enable Coasting_vtotal_check + */ + uint32_t enable_coasting_vtotal_check : 1; + /** + * 0x2000 (bit 13) + * @enable_visual_confirm_debug: Enable Visual Confirm Debug + */ + uint32_t enable_visual_confirm_debug : 1; + + uint32_t reserved : 18; + } bitfields; + + uint32_t u32All; +}; + +/** + * Flags record error state. + */ +union replay_visual_confirm_error_state_flags { + struct { + /** + * 0x1 (bit 0) - Desync Error flag. + */ + uint32_t desync_error : 1; + + /** + * 0x2 (bit 1) - State Transition Error flag. + */ + uint32_t state_transition_error : 1; + + /** + * 0x4 (bit 2) - Crc Error flag + */ + uint32_t crc_error : 1; + + /** + * 0x8 (bit 3) - Reserved + */ + uint32_t reserved_3 : 1; + + /** + * 0x10 (bit 4) - Incorrect Coasting vtotal checking --> use debug flag to control DPCD write. + * Added new debug flag to control DPCD. + */ + uint32_t incorrect_vtotal_in_static_screen : 1; + + /** + * 0x20 (bit 5) - No doubled Refresh Rate. + */ + uint32_t no_double_rr : 1; + + /** + * Reserved bit 6-7 + */ + uint32_t reserved_6_7 : 2; + + /** + * Reserved bit 9-31 + */ + uint32_t reserved_9_31 : 24; } bitfields; uint32_t u32All; @@ -3644,6 +3705,8 @@ enum dmub_cmd_replay_general_subtype { */ REPLAY_GENERAL_CMD_DISABLED_ADAPTIVE_SYNC_SDP, REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION, + REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS, + REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE, }; /** diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 95838c7ab054..29ccd3532d13 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -996,9 +996,9 @@ void set_replay_coasting_vtotal(struct dc_link *link, link->replay_settings.coasting_vtotal_table[type] = vtotal; } -void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) +void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) { - link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; + link->replay_settings.low_rr_full_screen_video_pseudo_vtotal = vtotal; } void calculate_replay_link_off_frame_count(struct dc_link *link, @@ -1039,3 +1039,8 @@ bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_back memcpy(caps->data_points, custom_backlight_profiles[config_no].data_points, data_points_size); return true; } + +void reset_replay_dsync_error_count(struct dc_link *link) +{ + link->replay_settings.replay_desync_error_fail_count = 0; +} diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index cac302e8fa10..758a8aa31fbe 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -62,7 +62,7 @@ void set_replay_defer_update_coasting_vtotal(struct dc_link *link, uint32_t vtotal); void update_replay_coasting_vtotal_from_defer(struct dc_link *link, enum replay_coasting_vtotal_type type); -void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); +void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal); @@ -78,4 +78,5 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps); +void reset_replay_dsync_error_count(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h index a1ece3eecdf5..a08611cb8041 100644 --- a/drivers/gpu/drm/amd/include/amd_pcie.h +++ b/drivers/gpu/drm/amd/include/amd_pcie.h @@ -49,6 +49,17 @@ | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3) /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */ + +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 0x00000001 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 0x00000002 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 0x00000004 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 0x00000008 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 0x00000010 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 0x00000020 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 0x00000040 +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_MASK 0x0000FFFF +#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_SHIFT 0 + #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 0x00010000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 0x00020000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 0x00040000 @@ -56,6 +67,7 @@ #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 0x00100000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 0x00200000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 0x00400000 +#define CAIL_PCIE_LINK_WIDTH_SUPPORT_MASK 0xFFFF0000 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT 16 /* 1/2/4/8/16 lanes */ @@ -65,4 +77,10 @@ | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \ | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) +#define AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \ + | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16) + #endif diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 98d9e840b0e2..6dccee403a3d 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -344,6 +344,16 @@ enum DC_DEBUG_MASK { * eDP display from ACPI _DDC method. */ DC_DISABLE_ACPI_EDID = 0x8000, + + /** + * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver. + */ + DC_DISABLE_HDMI_CEC = 0x10000, + + /** + * @DC_DISABLE_SUBVP: If set, disable DCN Sub-Viewport feature in amdgpu driver. + */ + DC_DISABLE_SUBVP = 0x20000, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 0f96b8c59a0e..274b3e1cc4fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -34,6 +34,8 @@ #define NUM_PCIE_BITRATES 4 #define NUM_XGMI_BITRATES 4 #define NUM_XGMI_WIDTHS 3 +#define NUM_SOC_P2S_TABLES 3 +#define NUM_TDP_GROUPS 4 typedef enum { /*0*/ FEATURE_DATA_CALCULATION = 0, @@ -80,8 +82,10 @@ typedef enum { /*41*/ FEATURE_CXL_QOS = 41, /*42*/ FEATURE_SOC_DC_RTC = 42, /*43*/ FEATURE_GFX_DC_RTC = 43, +/*44*/ FEATURE_DVM_MIN_PSM = 44, +/*45*/ FEATURE_PRC = 45, -/*44*/ NUM_FEATURES = 44 +/*46*/ NUM_FEATURES = 46 } FEATURE_LIST_e; //enum for MPIO PCIe gen speed msgs @@ -123,7 +127,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xE +#define SMU_METRICS_TABLE_VERSION 0xF typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -234,6 +238,9 @@ typedef struct __attribute__((packed, aligned(4))) { //PCIE BW Data and error count uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated + + //Total App Clock Counter + uint64_t GfxclkBelowHostLimitAcc[8]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { @@ -328,13 +335,14 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t JpegBusy[32]; } MetricsTableA_t; -#define SMU_VF_METRICS_TABLE_VERSION 0x3 +#define SMU_VF_METRICS_TABLE_VERSION 0x5 typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; uint32_t InstGfxclk_TargFreq; uint64_t AccGfxclk_TargFreq; uint64_t AccGfxRsmuDpm_Busy; + uint64_t AccGfxclkBelowHostLimit; } VfMetricsTable_t; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index ae3563d71fa0..8d4a96e23326 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -107,6 +107,7 @@ struct smu_13_0_dpm_context { struct smu_13_0_dpm_tables dpm_tables; uint32_t workload_policy_mask; uint32_t dcef_min_ds_clk; + uint64_t caps; }; enum smu_13_0_power_state { @@ -303,5 +304,7 @@ int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value); + +void smu_v13_0_interrupt_work(struct smu_context *smu); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f6b029354327..83163d7c7f00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 7bb45ff6d5c8..fbbdfa54f6a2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1321,11 +1321,11 @@ static int smu_v13_0_set_irq_state(struct amdgpu_device *adev, return 0; } -static int smu_v13_0_ack_ac_dc_interrupt(struct smu_context *smu) +void smu_v13_0_interrupt_work(struct smu_context *smu) { - return smu_cmn_send_smc_msg(smu, - SMU_MSG_ReenableAcDcInterrupt, - NULL); + smu_cmn_send_smc_msg(smu, + SMU_MSG_ReenableAcDcInterrupt, + NULL); } #define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */ @@ -1378,12 +1378,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, switch (ctxid) { case SMU_IH_INTERRUPT_CONTEXT_ID_AC: dev_dbg(adev->dev, "Switched to AC mode!\n"); - smu_v13_0_ack_ac_dc_interrupt(smu); + schedule_work(&smu->interrupt_work); adev->pm.ac_power = true; break; case SMU_IH_INTERRUPT_CONTEXT_ID_DC: dev_dbg(adev->dev, "Switched to DC mode!\n"); - smu_v13_0_ack_ac_dc_interrupt(smu); + schedule_work(&smu->interrupt_work); adev->pm.ac_power = false; break; case SMU_IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3aa705aae4c0..0551a3311217 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2643,11 +2643,12 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, &backend_workload_mask); /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ - if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && - ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7300))) || - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && - smu->adev->pm.fw_version >= 0x00504500)) { + if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) && + ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || + (smu->adev->pm.fw_version >= 0x004e7300))) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + smu->adev->pm.fw_version >= 0x00504500))) { workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); @@ -3219,6 +3220,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check, .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, + .interrupt_work = smu_v13_0_interrupt_work, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 5b86df0c8536..da7bd9227afe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -101,23 +101,24 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -static inline bool smu_v13_0_6_is_unified_metrics(struct smu_context *smu) -{ - return (smu->adev->flags & AMD_IS_APU) && - smu->smc_fw_version <= 0x4556900; -} - -static inline bool smu_v13_0_6_is_other_end_count_available(struct smu_context *smu) -{ - switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - return smu->smc_fw_version >= 0x557600; - case IP_VERSION(13, 0, 14): - return smu->smc_fw_version >= 0x05550E00; - default: - return false; - } -} +#define SMU_CAP(x) SMU_13_0_6_CAPS_##x + +enum smu_v13_0_6_caps { + SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(DPM_POLICY), + SMU_CAP(OTHER_END_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(PCIE_METRICS), + SMU_CAP(HST_LIMIT_METRICS), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(PER_INST_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND), + SMU_CAP(SDMA_RESET), + SMU_CAP(ALL), +}; struct mca_bank_ipid { enum amdgpu_mca_ip ip; @@ -281,6 +282,162 @@ struct smu_v13_0_6_dpm_map { uint32_t *freq_table; }; +static inline void smu_v13_0_6_cap_set(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + dpm_context->caps |= BIT_ULL(cap); +} + +static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + dpm_context->caps &= ~BIT_ULL(cap); +} + +static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu, + enum smu_v13_0_6_caps cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + return !!(dpm_context->caps & BIT_ULL(cap)); +} + +static void smu_v13_0_14_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(DPM_POLICY), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + uint32_t fw_ver = smu->smc_fw_version; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver >= 0x05550E00) + smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); + if (fw_ver >= 0x05551000) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + if (fw_ver >= 0x05550B00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + if (fw_ver >= 0x5551200) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_12_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + uint32_t fw_ver = smu->smc_fw_version; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver < 0x00561900) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); + + if (fw_ver >= 0x00561700) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_6_init_caps(struct smu_context *smu) +{ + enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), + SMU_CAP(UNI_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(DPM_POLICY), + SMU_CAP(PCIE_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND) }; + struct amdgpu_device *adev = smu->adev; + uint32_t fw_ver = smu->smc_fw_version; + uint32_t pgm = (fw_ver >> 24) & 0xFF; + + for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) + smu_v13_0_6_cap_set(smu, default_cap_list[i]); + + if (fw_ver < 0x552F00) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); + if (fw_ver < 0x554500) + smu_v13_0_6_cap_clear(smu, SMU_CAP(CTF_LIMIT)); + + if (adev->flags & AMD_IS_APU) { + smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); + smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + + if (fw_ver <= 0x4556900) + smu_v13_0_6_cap_clear(smu, SMU_CAP(UNI_METRICS)); + if (fw_ver >= 0x04556F00) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + if (fw_ver >= 0x04556A00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + } else { + if (fw_ver >= 0x557600) + smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); + if (fw_ver < 0x00556000) + smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); + if (amdgpu_sriov_vf(adev) && (fw_ver < 0x556600)) + smu_v13_0_6_cap_clear(smu, SMU_CAP(SET_UCLK_MAX)); + if (fw_ver < 0x556300) + smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); + if (fw_ver < 0x554800) + smu_v13_0_6_cap_clear(smu, SMU_CAP(MCA_DEBUG_MODE)); + if (fw_ver >= 0x556F00) + smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); + if (fw_ver < 0x00555a00) + smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); + if (fw_ver < 0x00555600) + smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + if (pgm == 0 && fw_ver >= 0x557900) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + } + if (((pgm == 7) && (fw_ver >= 0x7550700)) || + ((pgm == 0) && (fw_ver >= 0x00557900)) || + ((pgm == 4) && (fw_ver >= 0x4557000))) + smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); +} + +static void smu_v13_0_x_init_caps(struct smu_context *smu) +{ + switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 12): + return smu_v13_0_12_init_caps(smu); + case IP_VERSION(13, 0, 14): + return smu_v13_0_14_init_caps(smu); + default: + return smu_v13_0_6_init_caps(smu); + } +} + +static int smu_v13_0_6_check_fw_version(struct smu_context *smu) +{ + int r; + + r = smu_v13_0_check_fw_version(smu); + /* Initialize caps flags once fw version is fetched */ + if (!r) + smu_v13_0_x_init_caps(smu); + + return r; +} + static int smu_v13_0_6_init_microcode(struct smu_context *smu) { const struct smc_firmware_header_v2_1 *v2_1; @@ -602,7 +759,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); int ret, i, retry = 100; uint32_t table_version; @@ -798,8 +955,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) smu_v13_0_6_setup_driver_pptable(smu); /* DPM policy not supported in older firmwares */ - if (!(smu->adev->flags & AMD_IS_APU) && - (smu->smc_fw_version < 0x00556000)) { + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM_POLICY))) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; smu_dpm->dpm_policies->policy_mask &= @@ -976,7 +1132,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -989,7 +1145,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, switch (member) { case METRICS_CURR_GFXCLK: case METRICS_AVERAGE_GFXCLK: - if (smu->smc_fw_version >= 0x552F00) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { xcc_id = GET_INST(GC, 0); *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); } else { @@ -1676,7 +1832,7 @@ static int smu_v13_0_6_notify_unload(struct smu_context *smu) static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) { /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */ - if (smu->smc_fw_version < 0x554800) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(MCA_DEBUG_MODE))) return 0; return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, @@ -1821,9 +1977,8 @@ static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu, if (max == pstate_table->uclk_pstate.curr.max) return 0; /* For VF, only allowed in FW versions 85.102 or greater */ - if (amdgpu_sriov_vf(adev) && - ((smu->smc_fw_version < 0x556600) || - (adev->flags & AMD_IS_APU))) + if (!smu_v13_0_6_cap_supported(smu, + SMU_CAP(SET_UCLK_MAX))) return -EOPNOTSUPP; /* Only max clock limiting is allowed for UCLK */ ret = smu_v13_0_set_soft_freq_limited_range( @@ -2027,7 +2182,7 @@ static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu, ret = smu_cmn_get_enabled_mask(smu, feature_mask); - if (ret == -EIO && smu->smc_fw_version < 0x552F00) { + if (ret == -EIO && !smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { *feature_mask = 0; ret = 0; } @@ -2320,11 +2475,10 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { - bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; struct smu_table_context *smu_table = &smu->smu_table; struct gpu_metrics_v1_7 *gpu_metrics = (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; - bool flag = smu_v13_0_6_is_unified_metrics(smu); + bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; MetricsTableX_t *metrics_x; @@ -2332,6 +2486,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct amdgpu_xcp *xcp; u16 link_width_level; u32 inst_mask; + bool per_inst; metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); @@ -2358,6 +2513,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->average_umc_activity = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); + gpu_metrics->mem_max_bandwidth = + SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, flag)); + gpu_metrics->curr_socket_power = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)); /* Energy counter reported in 15.259uJ (2^-16) units */ @@ -2402,7 +2560,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table * table for both pf & one vf for smu version 85.99.0 or higher else report only * for pf from registers */ - if (smu->smc_fw_version >= 0x556300) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { gpu_metrics->pcie_link_width = metrics_x->PCIeLinkWidth; gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics_x->PCIeLinkSpeed); @@ -2431,7 +2589,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_x->PCIeNAKSentCountAcc; gpu_metrics->pcie_nak_rcvd_count_acc = metrics_x->PCIeNAKReceivedCountAcc; - if (smu_v13_0_6_is_other_end_count_available(smu)) + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) gpu_metrics->pcie_lc_perf_other_end_recovery = metrics_x->PCIeOtherEndRecoveryAcc; @@ -2456,17 +2614,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->num_partition = adev->xcp_mgr->num_xcps; - apu_per_inst = (adev->flags & AMD_IS_APU) && (smu->smc_fw_version >= 0x04556A00); - smu_13_0_6_per_inst = !(adev->flags & AMD_IS_APU) && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) - == IP_VERSION(13, 0, 6)) && - (smu->smc_fw_version >= 0x556F00); - smu_13_0_14_per_inst = !(adev->flags & AMD_IS_APU) && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) - == IP_VERSION(13, 0, 14)) && - (smu->smc_fw_version >= 0x05550B00); - - per_inst = apu_per_inst || smu_13_0_6_per_inst || smu_13_0_14_per_inst; + per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); for_each_xcp(adev->xcp_mgr, xcp, i) { amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); @@ -2496,6 +2644,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(metrics_x->GfxBusy[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]); + + if (smu_v13_0_6_cap_supported( + smu, SMU_CAP(HST_LIMIT_METRICS))) + gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = + SMUQ10_ROUND(metrics_x->GfxclkBelowHostLimitAcc + [inst]); idx++; } } @@ -2600,7 +2754,7 @@ static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, return -EINVAL; /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ - if (smu->smc_fw_version < 0x554500) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(CTF_LIMIT))) return 0; /* Get SOC Max operating temperature */ @@ -2702,11 +2856,10 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) { - struct amdgpu_device *adev = smu->adev; int ret; /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */ - if ((adev->flags & AMD_IS_APU) || smu->smc_fw_version < 0x00555a00) + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(RMA_MSG))) return 0; ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL); @@ -2720,17 +2873,13 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) { - struct amdgpu_device *adev = smu->adev; int ret = 0; - /* the message is only valid on SMU 13.0.6 with pmfw 85.121.00 and above */ - if ((adev->flags & AMD_IS_APU) || - amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6) || - smu->smc_fw_version < 0x00557900) - return 0; + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) + return -EOPNOTSUPP; ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_ResetSDMA, inst_mask, NULL); + SMU_MSG_ResetSDMA, inst_mask, NULL); if (ret) dev_err(smu->adev->dev, "failed to send ResetSDMA event with mask 0x%x\n", @@ -3049,7 +3198,7 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd if (instlo != 0x03b30400) return false; - if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) { errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); errcode &= 0xff; } else { @@ -3335,9 +3484,10 @@ static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) { + struct smu_context *smu = adev->powerplay.pp_handle; int error_code; - if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); else error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); @@ -3375,7 +3525,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_6_check_fw_status, /* pptable related */ - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_v13_0_6_check_fw_version, .set_driver_table_location = smu_v13_0_set_driver_table_location, .set_tool_table_location = smu_v13_0_set_tool_table_location, .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index aabb94796005..55ef18517b0f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2797,6 +2797,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, + .interrupt_work = smu_v13_0_interrupt_work, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 61f4a38e7d2b..657bc3dd18df 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -153,7 +153,16 @@ static int adv7511_hdmi_hw_params(struct device *dev, void *data, ADV7511_AUDIO_CFG3_LEN_MASK, len); regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG, ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4); - regmap_write(adv7511->regmap, 0x73, 0x1); + + /* send current Audio infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), BIT(5)); + + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(0), 0x1); + + /* use Audio infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), 0); return 0; } @@ -184,8 +193,9 @@ static int audio_startup(struct device *dev, void *data) regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(0), BIT(7) | BIT(6), BIT(7)); /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(1), + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, BIT(5), 0); + /* enable SPDIF receiver */ if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, @@ -204,7 +214,8 @@ static void audio_shutdown(struct device *dev, void *data) } static int adv7511_hdmi_i2s_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) + struct device_node *endpoint, + void *data) { struct of_endpoint of_ep; int ret; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index eb5919b38263..a13b3d8ab6ac 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1241,8 +1241,10 @@ static int adv7511_probe(struct i2c_client *i2c) return ret; ret = adv7511_init_regulators(adv7511); - if (ret) - return dev_err_probe(dev, ret, "failed to init regulators\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to init regulators\n"); + goto err_of_node_put; + } /* * The power down GPIO is optional. If present, toggle it from active to @@ -1363,6 +1365,8 @@ err_i2c_unregister_edid: i2c_unregister_device(adv7511->i2c_edid); uninit_regulators: adv7511_uninit_regulators(adv7511); +err_of_node_put: + of_node_put(adv7511->host_node); return ret; } @@ -1371,6 +1375,8 @@ static void adv7511_remove(struct i2c_client *i2c) { struct adv7511 *adv7511 = i2c_get_clientdata(i2c); + of_node_put(adv7511->host_node); + adv7511_uninit_regulators(adv7511); drm_bridge_remove(&adv7511->bridge); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c index 4481489aaf5e..122ad91e8a32 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c @@ -172,7 +172,7 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) of_property_read_u32(np, "adi,dsi-lanes", &num_lanes); - if (num_lanes < 1 || num_lanes > 4) + if (num_lanes < 2 || num_lanes > 4) return -EINVAL; adv->num_dsi_lanes = num_lanes; @@ -181,8 +181,6 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) if (!adv->host_node) return -ENODEV; - of_node_put(adv->host_node); - adv->use_timing_gen = !of_property_read_bool(np, "adi,disable-timing-generator"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 6238eabd2328..4be34d5c7a3b 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1952,7 +1952,8 @@ static void anx7625_audio_shutdown(struct device *dev, void *data) } static int anx7625_hdmi_i2s_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) + struct device_node *endpoint, + void *data) { struct of_endpoint of_ep; int ret; @@ -2139,49 +2140,6 @@ static void hdcp_check_work_func(struct work_struct *work) drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); } -static int anx7625_connector_atomic_check(struct anx7625_data *ctx, - struct drm_connector_state *state) -{ - struct device *dev = ctx->dev; - int cp; - - dev_dbg(dev, "hdcp state check\n"); - cp = state->content_protection; - - if (cp == ctx->hdcp_cp) - return 0; - - if (cp == DRM_MODE_CONTENT_PROTECTION_DESIRED) { - if (ctx->dp_en) { - dev_dbg(dev, "enable HDCP\n"); - anx7625_hdcp_enable(ctx); - - queue_delayed_work(ctx->hdcp_workqueue, - &ctx->hdcp_work, - msecs_to_jiffies(2000)); - } - } - - if (cp == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - if (ctx->hdcp_cp != DRM_MODE_CONTENT_PROTECTION_ENABLED) { - dev_err(dev, "current CP is not ENABLED\n"); - return -EINVAL; - } - anx7625_hdcp_disable(ctx); - ctx->hdcp_cp = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; - drm_hdcp_update_content_protection(ctx->connector, - ctx->hdcp_cp); - dev_dbg(dev, "update CP to UNDESIRE\n"); - } - - if (cp == DRM_MODE_CONTENT_PROTECTION_ENABLED) { - dev_err(dev, "Userspace illegal set to PROTECTION ENABLE\n"); - return -EINVAL; - } - - return 0; -} - static int anx7625_bridge_attach(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags) { @@ -2418,7 +2376,7 @@ static int anx7625_bridge_atomic_check(struct drm_bridge *bridge, anx7625_bridge_mode_fixup(bridge, &crtc_state->mode, &crtc_state->adjusted_mode); - return anx7625_connector_atomic_check(ctx, conn_state); + return 0; } static void anx7625_bridge_atomic_enable(struct drm_bridge *bridge, @@ -2427,6 +2385,7 @@ static void anx7625_bridge_atomic_enable(struct drm_bridge *bridge, struct anx7625_data *ctx = bridge_to_anx7625(bridge); struct device *dev = ctx->dev; struct drm_connector *connector; + struct drm_connector_state *conn_state; dev_dbg(dev, "drm atomic enable\n"); @@ -2441,6 +2400,22 @@ static void anx7625_bridge_atomic_enable(struct drm_bridge *bridge, _anx7625_hpd_polling(ctx, 5000 * 100); anx7625_dp_start(ctx); + + conn_state = drm_atomic_get_new_connector_state(state->base.state, connector); + + if (WARN_ON(!conn_state)) + return; + + if (conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) { + if (ctx->dp_en) { + dev_dbg(dev, "enable HDCP\n"); + anx7625_hdcp_enable(ctx); + + queue_delayed_work(ctx->hdcp_workqueue, + &ctx->hdcp_work, + msecs_to_jiffies(2000)); + } + } } static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, @@ -2451,6 +2426,17 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, dev_dbg(dev, "drm atomic disable\n"); + flush_workqueue(ctx->hdcp_workqueue); + + if (ctx->connector && + ctx->hdcp_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + anx7625_hdcp_disable(ctx); + ctx->hdcp_cp = DRM_MODE_CONTENT_PROTECTION_DESIRED; + drm_hdcp_update_content_protection(ctx->connector, + ctx->hdcp_cp); + dev_dbg(dev, "update CP to DESIRE\n"); + } + ctx->connector = NULL; anx7625_dp_stop(ctx); diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index 44af1f25034a..306b5e374b9e 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -569,15 +569,6 @@ static int it6263_read_edid(void *data, u8 *buf, unsigned int block, size_t len) return 0; } -static int it6263_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - return drm_atomic_helper_connector_hdmi_check(conn_state->connector, - conn_state->state); -} - static void it6263_bridge_atomic_disable(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state) @@ -812,7 +803,6 @@ static const struct drm_bridge_funcs it6263_bridge_funcs = { .mode_valid = it6263_bridge_mode_valid, .atomic_disable = it6263_bridge_atomic_disable, .atomic_enable = it6263_bridge_atomic_enable, - .atomic_check = it6263_bridge_atomic_check, .detect = it6263_bridge_detect, .edid_read = it6263_bridge_edid_read, .atomic_get_input_bus_fmts = it6263_bridge_atomic_get_input_bus_fmts, @@ -864,8 +854,8 @@ static int it6263_probe(struct i2c_client *client) it->lvds_i2c = devm_i2c_new_dummy_device(dev, client->adapter, LVDS_INPUT_CTRL_I2C_ADDR); if (IS_ERR(it->lvds_i2c)) - dev_err_probe(it->dev, PTR_ERR(it->lvds_i2c), - "failed to allocate I2C device for LVDS\n"); + return dev_err_probe(it->dev, PTR_ERR(it->lvds_i2c), + "failed to allocate I2C device for LVDS\n"); it->lvds_regmap = devm_regmap_init_i2c(it->lvds_i2c, &it6263_lvds_regmap_config); diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 0faad10ba8e4..88ef76a37fe6 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -19,6 +19,7 @@ #include <linux/regulator/consumer.h> #include <linux/types.h> #include <linux/wait.h> +#include <linux/bitfield.h> #include <crypto/hash.h> @@ -126,6 +127,7 @@ #define REG_AUX_OUT_DATA0 0x27 #define REG_AUX_CMD_REQ 0x2B +#define M_AUX_REQ_CMD 0x0F #define AUX_BUSY BIT(5) #define REG_AUX_DATA_0_7 0x2C @@ -266,6 +268,18 @@ #define REG_SSC_CTRL1 0x189 #define REG_SSC_CTRL2 0x18A +#define REG_AUX_USER_CTRL 0x190 +#define EN_USER_AUX BIT(0) +#define USER_AUX_DONE BIT(1) +#define AUX_EVENT BIT(4) + +#define REG_AUX_USER_DATA_REC 0x191 +#define M_AUX_IN_REC 0xF0 +#define M_AUX_OUT_REC 0x0F + +#define REG_AUX_USER_REPLY 0x19A +#define REG_AUX_USER_RXB(n) (n + 0x19B) + #define RBR DP_LINK_BW_1_62 #define HBR DP_LINK_BW_2_7 #define HBR2 DP_LINK_BW_5_4 @@ -296,11 +310,13 @@ #define MAX_LANE_COUNT 4 #define MAX_LINK_RATE HBR #define AUTO_TRAIN_RETRY 3 -#define MAX_HDCP_DOWN_STREAM_COUNT 10 +#define MAX_HDCP_DOWN_STREAM_COUNT 127 #define MAX_CR_LEVEL 0x03 #define MAX_EQ_LEVEL 0x03 #define AUX_WAIT_TIMEOUT_MS 15 -#define AUX_FIFO_MAX_SIZE 32 +#define AUX_FIFO_MAX_SIZE 16 +#define AUX_I2C_MAX_SIZE 4 +#define AUX_I2C_DEFER_RETRY 4 #define PIXEL_CLK_DELAY 1 #define PIXEL_CLK_INVERSE 0 #define ADJUST_PHASE_THRESHOLD 80000 @@ -323,7 +339,15 @@ enum aux_cmd_type { CMD_AUX_NATIVE_READ = 0x0, CMD_AUX_NATIVE_WRITE = 0x5, + CMD_AUX_GI2C_ADR = 0x08, + CMD_AUX_GI2C_READ = 0x09, + CMD_AUX_GI2C_WRITE = 0x0A, CMD_AUX_I2C_EDID_READ = 0xB, + CMD_AUX_I2C_READ = 0x0D, + CMD_AUX_I2C_WRITE = 0x0C, + + /* KSV read with AUX FIFO extend from CMD_AUX_NATIVE_READ*/ + CMD_AUX_GET_KSV_LIST = 0x10, }; enum aux_cmd_reply { @@ -965,7 +989,8 @@ static ssize_t it6505_aux_operation(struct it6505 *it6505, it6505_set_bits(it6505, REG_AUX_CTRL, AUX_USER_MODE, AUX_USER_MODE); aux_op_start: - if (cmd == CMD_AUX_I2C_EDID_READ) { + /* HW AUX FIFO supports only EDID and DCPD KSV FIFO area */ + if (cmd == CMD_AUX_I2C_EDID_READ || cmd == CMD_AUX_GET_KSV_LIST) { /* AUX EDID FIFO has max length of AUX_FIFO_MAX_SIZE bytes. */ size = min_t(size_t, size, AUX_FIFO_MAX_SIZE); /* Enable AUX FIFO read back and clear FIFO */ @@ -996,7 +1021,7 @@ aux_op_start: size); /* Aux Fire */ - it6505_write(it6505, REG_AUX_CMD_REQ, cmd); + it6505_write(it6505, REG_AUX_CMD_REQ, FIELD_GET(M_AUX_REQ_CMD, cmd)); ret = it6505_aux_wait(it6505); if (ret < 0) @@ -1030,7 +1055,7 @@ aux_op_start: goto aux_op_start; } - if (cmd == CMD_AUX_I2C_EDID_READ) { + if (cmd == CMD_AUX_I2C_EDID_READ || cmd == CMD_AUX_GET_KSV_LIST) { for (i = 0; i < size; i++) { ret = it6505_read(it6505, REG_AUX_DATA_FIFO); if (ret < 0) @@ -1055,7 +1080,7 @@ aux_op_start: ret = i; aux_op_err: - if (cmd == CMD_AUX_I2C_EDID_READ) { + if (cmd == CMD_AUX_I2C_EDID_READ || cmd == CMD_AUX_GET_KSV_LIST) { /* clear AUX FIFO */ it6505_set_bits(it6505, REG_AUX_CTRL, AUX_EN_FIFO_READ | CLR_EDID_FIFO, @@ -1076,10 +1101,14 @@ static ssize_t it6505_aux_do_transfer(struct it6505 *it6505, size_t size, enum aux_cmd_reply *reply) { int i, ret_size, ret = 0, request_size; + int fifo_max_size = (cmd == CMD_AUX_I2C_EDID_READ || cmd == CMD_AUX_GET_KSV_LIST) ? + AUX_FIFO_MAX_SIZE : 4; mutex_lock(&it6505->aux_lock); - for (i = 0; i < size; i += 4) { - request_size = min((int)size - i, 4); + i = 0; + do { + request_size = min_t(int, (int)size - i, fifo_max_size); + ret_size = it6505_aux_operation(it6505, cmd, address + i, buffer + i, request_size, reply); @@ -1088,14 +1117,170 @@ static ssize_t it6505_aux_do_transfer(struct it6505 *it6505, goto aux_op_err; } + i += request_size; ret += ret_size; - } + } while (i < size); aux_op_err: mutex_unlock(&it6505->aux_lock); return ret; } +static bool it6505_aux_i2c_reply_defer(u8 reply) +{ + if (reply == DP_AUX_NATIVE_REPLY_DEFER || reply == DP_AUX_I2C_REPLY_DEFER) + return true; + return false; +} + +static bool it6505_aux_i2c_reply_nack(u8 reply) +{ + if (reply == DP_AUX_NATIVE_REPLY_NACK || reply == DP_AUX_I2C_REPLY_NACK) + return true; + return false; +} + +static int it6505_aux_i2c_wait(struct it6505 *it6505, u8 *reply) +{ + int err = 0; + unsigned long timeout; + struct device *dev = it6505->dev; + + timeout = jiffies + msecs_to_jiffies(AUX_WAIT_TIMEOUT_MS) + 1; + + do { + if (it6505_read(it6505, REG_AUX_USER_CTRL) & AUX_EVENT) + break; + if (time_after(jiffies, timeout)) { + dev_err(dev, "Timed out waiting AUX I2C, BUSY = %X\n", + it6505_aux_op_finished(it6505)); + err = -ETIMEDOUT; + goto end_aux_i2c_wait; + } + usleep_range(300, 800); + } while (!it6505_aux_op_finished(it6505)); + + *reply = it6505_read(it6505, REG_AUX_USER_REPLY) >> 4; + + if (*reply == 0) + goto end_aux_i2c_wait; + + if (it6505_aux_i2c_reply_defer(*reply)) + err = -EBUSY; + else if (it6505_aux_i2c_reply_nack(*reply)) + err = -ENXIO; + +end_aux_i2c_wait: + it6505_set_bits(it6505, REG_AUX_USER_CTRL, USER_AUX_DONE, USER_AUX_DONE); + return err; +} + +static int it6505_aux_i2c_readb(struct it6505 *it6505, u8 *buf, size_t size, u8 *reply) +{ + int ret, i; + int retry; + + for (retry = 0; retry < AUX_I2C_DEFER_RETRY; retry++) { + it6505_write(it6505, REG_AUX_CMD_REQ, CMD_AUX_GI2C_READ); + + ret = it6505_aux_i2c_wait(it6505, reply); + if (it6505_aux_i2c_reply_defer(*reply)) + continue; + if (ret >= 0) + break; + } + + for (i = 0; i < size; i++) + buf[i] = it6505_read(it6505, REG_AUX_USER_RXB(0 + i)); + + return size; +} + +static int it6505_aux_i2c_writeb(struct it6505 *it6505, u8 *buf, size_t size, u8 *reply) +{ + int i, ret; + int retry; + + for (i = 0; i < size; i++) + it6505_write(it6505, REG_AUX_OUT_DATA0 + i, buf[i]); + + for (retry = 0; retry < AUX_I2C_DEFER_RETRY; retry++) { + it6505_write(it6505, REG_AUX_CMD_REQ, CMD_AUX_GI2C_WRITE); + + ret = it6505_aux_i2c_wait(it6505, reply); + if (it6505_aux_i2c_reply_defer(*reply)) + continue; + if (ret >= 0) + break; + } + return size; +} + +static ssize_t it6505_aux_i2c_operation(struct it6505 *it6505, + struct drm_dp_aux_msg *msg) +{ + int ret; + ssize_t request_size, data_cnt = 0; + u8 *buffer = msg->buffer; + + /* set AUX user mode */ + it6505_set_bits(it6505, REG_AUX_CTRL, + AUX_USER_MODE | AUX_NO_SEGMENT_WR, AUX_USER_MODE); + it6505_set_bits(it6505, REG_AUX_USER_CTRL, EN_USER_AUX, EN_USER_AUX); + /* clear AUX FIFO */ + it6505_set_bits(it6505, REG_AUX_CTRL, + AUX_EN_FIFO_READ | CLR_EDID_FIFO, + AUX_EN_FIFO_READ | CLR_EDID_FIFO); + + it6505_set_bits(it6505, REG_AUX_CTRL, + AUX_EN_FIFO_READ | CLR_EDID_FIFO, 0x00); + + it6505_write(it6505, REG_AUX_ADR_0_7, 0x00); + it6505_write(it6505, REG_AUX_ADR_8_15, msg->address << 1); + + if (msg->size == 0) { + /* IIC Start/STOP dummy write */ + it6505_write(it6505, REG_AUX_ADR_16_19, msg->request); + it6505_write(it6505, REG_AUX_CMD_REQ, CMD_AUX_GI2C_ADR); + ret = it6505_aux_i2c_wait(it6505, &msg->reply); + goto end_aux_i2c_transfer; + } + + /* IIC data transfer */ + data_cnt = 0; + do { + request_size = min_t(ssize_t, msg->size - data_cnt, AUX_I2C_MAX_SIZE); + it6505_write(it6505, REG_AUX_ADR_16_19, + msg->request | ((request_size - 1) << 4)); + if ((msg->request & DP_AUX_I2C_READ) == DP_AUX_I2C_READ) + ret = it6505_aux_i2c_readb(it6505, &buffer[data_cnt], + request_size, &msg->reply); + else + ret = it6505_aux_i2c_writeb(it6505, &buffer[data_cnt], + request_size, &msg->reply); + + if (ret < 0) + goto end_aux_i2c_transfer; + + data_cnt += request_size; + } while (data_cnt < msg->size); + ret = data_cnt; +end_aux_i2c_transfer: + + it6505_set_bits(it6505, REG_AUX_USER_CTRL, EN_USER_AUX, 0); + it6505_set_bits(it6505, REG_AUX_CTRL, AUX_USER_MODE, 0); + return ret; +} + +static ssize_t it6505_aux_i2c_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + struct it6505 *it6505 = container_of(aux, struct it6505, aux); + + guard(mutex)(&it6505->aux_lock); + return it6505_aux_i2c_operation(it6505, msg); +} + static ssize_t it6505_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -1105,9 +1290,8 @@ static ssize_t it6505_aux_transfer(struct drm_dp_aux *aux, int ret; enum aux_cmd_reply reply; - /* IT6505 doesn't support arbitrary I2C read / write. */ if (is_i2c) - return -EINVAL; + return it6505_aux_i2c_transfer(aux, msg); switch (msg->request) { case DP_AUX_NATIVE_READ: @@ -1178,6 +1362,37 @@ static int it6505_get_edid_block(void *data, u8 *buf, unsigned int block, return 0; } +static int it6505_get_ksvlist(struct it6505 *it6505, u8 *buf, size_t len) +{ + struct device *dev = it6505->dev; + enum aux_cmd_reply reply; + int request_size, ret; + int i = 0; + + do { + request_size = min_t(int, (int)len - i, 15); + + ret = it6505_aux_do_transfer(it6505, CMD_AUX_GET_KSV_LIST, + DP_AUX_HDCP_KSV_FIFO, + buf + i, request_size, &reply); + + DRM_DEV_DEBUG_DRIVER(dev, "request_size = %d, ret =%d", request_size, ret); + if (ret < 0) + return ret; + + i += request_size; + } while (i < len); + + DRM_DEV_DEBUG_DRIVER(dev, "ksv read cnt = %d down_stream_cnt=%d ", i, i / 5); + + for (i = 0 ; i < len; i += 5) { + DRM_DEV_DEBUG_DRIVER(dev, "ksv[%d] = %02X%02X%02X%02X%02X", + i / 5, buf[i], buf[i + 1], buf[i + 2], buf[i + 3], buf[i + 4]); + } + + return len; +} + static void it6505_variable_config(struct it6505 *it6505) { it6505->link_rate_bw_code = HBR; @@ -1959,7 +2174,7 @@ static int it6505_setup_sha1_input(struct it6505 *it6505, u8 *sha1_input) { struct device *dev = it6505->dev; u8 binfo[2]; - int down_stream_count, i, err, msg_count = 0; + int down_stream_count, err, msg_count = 0; err = it6505_get_dpcd(it6505, DP_AUX_HDCP_BINFO, binfo, ARRAY_SIZE(binfo)); @@ -1984,18 +2199,11 @@ static int it6505_setup_sha1_input(struct it6505 *it6505, u8 *sha1_input) down_stream_count); return 0; } + err = it6505_get_ksvlist(it6505, sha1_input, down_stream_count * 5); + if (err < 0) + return err; - for (i = 0; i < down_stream_count; i++) { - err = it6505_get_dpcd(it6505, DP_AUX_HDCP_KSV_FIFO + - (i % 3) * DRM_HDCP_KSV_LEN, - sha1_input + msg_count, - DRM_HDCP_KSV_LEN); - - if (err < 0) - return err; - - msg_count += 5; - } + msg_count += down_stream_count * 5; it6505->hdcp_down_stream_count = down_stream_count; sha1_input[msg_count++] = binfo[0]; @@ -2023,7 +2231,7 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) { struct device *dev = it6505->dev; u8 av[5][4], bv[5][4]; - int i, err; + int i, err, retry; i = it6505_setup_sha1_input(it6505, it6505->sha1_input); if (i <= 0) { @@ -2032,22 +2240,28 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) } it6505_sha1_digest(it6505, it6505->sha1_input, i, (u8 *)av); + /*1B-05 V' must retry 3 times */ + for (retry = 0; retry < 3; retry++) { + err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, + sizeof(bv)); - err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, - sizeof(bv)); + if (err < 0) { + dev_err(dev, "Read V' value Fail %d", retry); + continue; + } - if (err < 0) { - dev_err(dev, "Read V' value Fail"); - return false; - } + for (i = 0; i < 5; i++) { + if (bv[i][3] != av[i][0] || bv[i][2] != av[i][1] || + av[i][1] != av[i][2] || bv[i][0] != av[i][3]) + break; - for (i = 0; i < 5; i++) - if (bv[i][3] != av[i][0] || bv[i][2] != av[i][1] || - bv[i][1] != av[i][2] || bv[i][0] != av[i][3]) - return false; + DRM_DEV_DEBUG_DRIVER(dev, "V' all match!! %d, %d", retry, i); + return true; + } + } - DRM_DEV_DEBUG_DRIVER(dev, "V' all match!!"); - return true; + DRM_DEV_DEBUG_DRIVER(dev, "V' NOT match!! %d", retry); + return false; } static void it6505_hdcp_wait_ksv_list(struct work_struct *work) @@ -2055,12 +2269,13 @@ static void it6505_hdcp_wait_ksv_list(struct work_struct *work) struct it6505 *it6505 = container_of(work, struct it6505, hdcp_wait_ksv_list); struct device *dev = it6505->dev; - unsigned int timeout = 5000; - u8 bstatus = 0; + u8 bstatus; bool ksv_list_check; + /* 1B-04 wait ksv list for 5s */ + unsigned long timeout = jiffies + + msecs_to_jiffies(5000) + 1; - timeout /= 20; - while (timeout > 0) { + for (;;) { if (!it6505_get_sink_hpd_status(it6505)) return; @@ -2069,27 +2284,23 @@ static void it6505_hdcp_wait_ksv_list(struct work_struct *work) if (bstatus & DP_BSTATUS_READY) break; - msleep(20); - timeout--; - } + if (time_after(jiffies, timeout)) { + DRM_DEV_DEBUG_DRIVER(dev, "KSV list wait timeout"); + goto timeout; + } - if (timeout == 0) { - DRM_DEV_DEBUG_DRIVER(dev, "timeout and ksv list wait failed"); - goto timeout; + msleep(20); } ksv_list_check = it6505_hdcp_part2_ksvlist_check(it6505); DRM_DEV_DEBUG_DRIVER(dev, "ksv list ready, ksv list check %s", ksv_list_check ? "pass" : "fail"); - if (ksv_list_check) { - it6505_set_bits(it6505, REG_HDCP_TRIGGER, - HDCP_TRIGGER_KSV_DONE, HDCP_TRIGGER_KSV_DONE); + + if (ksv_list_check) return; - } + timeout: - it6505_set_bits(it6505, REG_HDCP_TRIGGER, - HDCP_TRIGGER_KSV_DONE | HDCP_TRIGGER_KSV_FAIL, - HDCP_TRIGGER_KSV_DONE | HDCP_TRIGGER_KSV_FAIL); + it6505_start_hdcp(it6505); } static void it6505_hdcp_work(struct work_struct *work) @@ -2312,14 +2523,20 @@ static int it6505_process_hpd_irq(struct it6505 *it6505) DRM_DEV_DEBUG_DRIVER(dev, "dp_irq_vector = 0x%02x", dp_irq_vector); if (dp_irq_vector & DP_CP_IRQ) { - it6505_set_bits(it6505, REG_HDCP_TRIGGER, HDCP_TRIGGER_CPIRQ, - HDCP_TRIGGER_CPIRQ); - bstatus = it6505_dpcd_read(it6505, DP_AUX_HDCP_BSTATUS); if (bstatus < 0) return bstatus; DRM_DEV_DEBUG_DRIVER(dev, "Bstatus = 0x%02x", bstatus); + + /*Check BSTATUS when recive CP_IRQ */ + if (bstatus & DP_BSTATUS_R0_PRIME_READY && + it6505->hdcp_status == HDCP_AUTH_GOING) + it6505_set_bits(it6505, REG_HDCP_TRIGGER, HDCP_TRIGGER_CPIRQ, + HDCP_TRIGGER_CPIRQ); + else if (bstatus & (DP_BSTATUS_REAUTH_REQ | DP_BSTATUS_LINK_FAILURE) && + it6505->hdcp_status == HDCP_AUTH_DONE) + it6505_start_hdcp(it6505); } ret = drm_dp_dpcd_read_link_status(&it6505->aux, link_status); @@ -2456,7 +2673,11 @@ static void it6505_irq_hdcp_ksv_check(struct it6505 *it6505) { struct device *dev = it6505->dev; - DRM_DEV_DEBUG_DRIVER(dev, "HDCP event Interrupt"); + DRM_DEV_DEBUG_DRIVER(dev, "HDCP repeater R0 event Interrupt"); + /* 1B01 HDCP encription should start when R0 is ready*/ + it6505_set_bits(it6505, REG_HDCP_TRIGGER, + HDCP_TRIGGER_KSV_DONE, HDCP_TRIGGER_KSV_DONE); + schedule_work(&it6505->hdcp_wait_ksv_list); } diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 940083e5d2dd..23edcde6b9a7 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1466,7 +1466,6 @@ static const struct hdmi_codec_ops it66121_audio_codec_ops = { .audio_shutdown = it66121_audio_shutdown, .mute_stream = it66121_audio_mute, .get_eld = it66121_audio_get_eld, - .no_capture_mute = 1, }; static int it66121_audio_codec_init(struct it66121_ctx *ctx, struct device *dev) @@ -1476,11 +1475,12 @@ static int it66121_audio_codec_init(struct it66121_ctx *ctx, struct device *dev) .i2s = 1, /* Only i2s support for now */ .spdif = 0, .max_i2s_channels = 8, + .no_capture_mute = 1, }; dev_dbg(dev, "%s\n", __func__); - if (!of_property_read_bool(dev->of_node, "#sound-dai-cells")) { + if (!of_property_present(dev->of_node, "#sound-dai-cells")) { dev_info(dev, "No \"#sound-dai-cells\", no audio\n"); return 0; } diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 74f726efc746..e650cd83fc8d 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -45,7 +45,6 @@ struct lt9611 { struct device_node *dsi1_node; struct mipi_dsi_device *dsi0; struct mipi_dsi_device *dsi1; - struct platform_device *audio_pdev; bool ac_mode; @@ -767,15 +766,6 @@ static enum drm_mode_status lt9611_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static int lt9611_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - return drm_atomic_helper_connector_hdmi_check(conn_state->connector, - conn_state->state); -} - static void lt9611_bridge_atomic_pre_enable(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state) { @@ -864,6 +854,10 @@ static int lt9611_hdmi_clear_infoframe(struct drm_bridge *bridge, unsigned int mask; switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + mask = LT9611_INFOFRAME_AUDIO; + break; + case HDMI_INFOFRAME_TYPE_AVI: mask = LT9611_INFOFRAME_AVI; break; @@ -897,6 +891,11 @@ static int lt9611_hdmi_write_infoframe(struct drm_bridge *bridge, int i; switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + mask = LT9611_INFOFRAME_AUDIO; + addr = 0x84b2; + break; + case HDMI_INFOFRAME_TYPE_AVI: mask = LT9611_INFOFRAME_AVI; addr = 0x8440; @@ -940,6 +939,55 @@ lt9611_hdmi_tmds_char_rate_valid(const struct drm_bridge *bridge, return MODE_OK; } +static int lt9611_hdmi_audio_startup(struct drm_connector *connector, + struct drm_bridge *bridge) +{ + struct lt9611 *lt9611 = bridge_to_lt9611(bridge); + + regmap_write(lt9611->regmap, 0x82d6, 0x8c); + regmap_write(lt9611->regmap, 0x82d7, 0x04); + + regmap_write(lt9611->regmap, 0x8406, 0x08); + regmap_write(lt9611->regmap, 0x8407, 0x10); + + regmap_write(lt9611->regmap, 0x8434, 0xd5); + + return 0; +} + +static int lt9611_hdmi_audio_prepare(struct drm_connector *connector, + struct drm_bridge *bridge, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms) +{ + struct lt9611 *lt9611 = bridge_to_lt9611(bridge); + + if (hparms->sample_rate == 48000) + regmap_write(lt9611->regmap, 0x840f, 0x2b); + else if (hparms->sample_rate == 96000) + regmap_write(lt9611->regmap, 0x840f, 0xab); + else + return -EINVAL; + + regmap_write(lt9611->regmap, 0x8435, 0x00); + regmap_write(lt9611->regmap, 0x8436, 0x18); + regmap_write(lt9611->regmap, 0x8437, 0x00); + + return drm_atomic_helper_connector_hdmi_update_audio_infoframe(connector, + &hparms->cea); +} + +static void lt9611_hdmi_audio_shutdown(struct drm_connector *connector, + struct drm_bridge *bridge) +{ + struct lt9611 *lt9611 = bridge_to_lt9611(bridge); + + drm_atomic_helper_connector_hdmi_clear_audio_infoframe(connector); + + regmap_write(lt9611->regmap, 0x8406, 0x00); + regmap_write(lt9611->regmap, 0x8407, 0x00); +} + static const struct drm_bridge_funcs lt9611_bridge_funcs = { .attach = lt9611_bridge_attach, .mode_valid = lt9611_bridge_mode_valid, @@ -947,7 +995,6 @@ static const struct drm_bridge_funcs lt9611_bridge_funcs = { .edid_read = lt9611_bridge_edid_read, .hpd_enable = lt9611_bridge_hpd_enable, - .atomic_check = lt9611_bridge_atomic_check, .atomic_pre_enable = lt9611_bridge_atomic_pre_enable, .atomic_enable = lt9611_bridge_atomic_enable, .atomic_disable = lt9611_bridge_atomic_disable, @@ -960,6 +1007,10 @@ static const struct drm_bridge_funcs lt9611_bridge_funcs = { .hdmi_tmds_char_rate_valid = lt9611_hdmi_tmds_char_rate_valid, .hdmi_write_infoframe = lt9611_hdmi_write_infoframe, .hdmi_clear_infoframe = lt9611_hdmi_clear_infoframe, + + .hdmi_audio_startup = lt9611_hdmi_audio_startup, + .hdmi_audio_prepare = lt9611_hdmi_audio_prepare, + .hdmi_audio_shutdown = lt9611_hdmi_audio_shutdown, }; static int lt9611_parse_dt(struct device *dev, @@ -1013,101 +1064,6 @@ static int lt9611_read_device_rev(struct lt9611 *lt9611) return ret; } -static int lt9611_hdmi_hw_params(struct device *dev, void *data, - struct hdmi_codec_daifmt *fmt, - struct hdmi_codec_params *hparms) -{ - struct lt9611 *lt9611 = data; - - if (hparms->sample_rate == 48000) - regmap_write(lt9611->regmap, 0x840f, 0x2b); - else if (hparms->sample_rate == 96000) - regmap_write(lt9611->regmap, 0x840f, 0xab); - else - return -EINVAL; - - regmap_write(lt9611->regmap, 0x8435, 0x00); - regmap_write(lt9611->regmap, 0x8436, 0x18); - regmap_write(lt9611->regmap, 0x8437, 0x00); - - return 0; -} - -static int lt9611_audio_startup(struct device *dev, void *data) -{ - struct lt9611 *lt9611 = data; - - regmap_write(lt9611->regmap, 0x82d6, 0x8c); - regmap_write(lt9611->regmap, 0x82d7, 0x04); - - regmap_write(lt9611->regmap, 0x8406, 0x08); - regmap_write(lt9611->regmap, 0x8407, 0x10); - - regmap_write(lt9611->regmap, 0x8434, 0xd5); - - return 0; -} - -static void lt9611_audio_shutdown(struct device *dev, void *data) -{ - struct lt9611 *lt9611 = data; - - regmap_write(lt9611->regmap, 0x8406, 0x00); - regmap_write(lt9611->regmap, 0x8407, 0x00); -} - -static int lt9611_hdmi_i2s_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) -{ - struct of_endpoint of_ep; - int ret; - - ret = of_graph_parse_endpoint(endpoint, &of_ep); - if (ret < 0) - return ret; - - /* - * HDMI sound should be located as reg = <2> - * Then, it is sound port 0 - */ - if (of_ep.port == 2) - return 0; - - return -EINVAL; -} - -static const struct hdmi_codec_ops lt9611_codec_ops = { - .hw_params = lt9611_hdmi_hw_params, - .audio_shutdown = lt9611_audio_shutdown, - .audio_startup = lt9611_audio_startup, - .get_dai_id = lt9611_hdmi_i2s_get_dai_id, -}; - -static struct hdmi_codec_pdata codec_data = { - .ops = <9611_codec_ops, - .max_i2s_channels = 8, - .i2s = 1, -}; - -static int lt9611_audio_init(struct device *dev, struct lt9611 *lt9611) -{ - codec_data.data = lt9611; - lt9611->audio_pdev = - platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, - PLATFORM_DEVID_AUTO, - &codec_data, sizeof(codec_data)); - - return PTR_ERR_OR_ZERO(lt9611->audio_pdev); -} - -static void lt9611_audio_exit(struct lt9611 *lt9611) -{ - if (lt9611->audio_pdev) { - platform_device_unregister(lt9611->audio_pdev); - lt9611->audio_pdev = NULL; - } -} - static int lt9611_probe(struct i2c_client *client) { struct lt9611 *lt9611; @@ -1171,6 +1127,9 @@ static int lt9611_probe(struct i2c_client *client) i2c_set_clientdata(client, lt9611); + /* Disable Audio InfoFrame, enabled by default */ + regmap_update_bits(lt9611->regmap, 0x843d, LT9611_INFOFRAME_AUDIO, 0); + lt9611->bridge.funcs = <9611_bridge_funcs; lt9611->bridge.of_node = client->dev.of_node; lt9611->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | @@ -1179,6 +1138,9 @@ static int lt9611_probe(struct i2c_client *client) lt9611->bridge.type = DRM_MODE_CONNECTOR_HDMIA; lt9611->bridge.vendor = "Lontium"; lt9611->bridge.product = "LT9611"; + lt9611->bridge.hdmi_audio_dev = dev; + lt9611->bridge.hdmi_audio_max_i2s_playback_channels = 8; + lt9611->bridge.hdmi_audio_dai_port = 2; drm_bridge_add(<9611->bridge); @@ -1200,10 +1162,6 @@ static int lt9611_probe(struct i2c_client *client) lt9611_enable_hpd_interrupts(lt9611); - ret = lt9611_audio_init(dev, lt9611); - if (ret) - goto err_remove_bridge; - return 0; err_remove_bridge: @@ -1224,7 +1182,6 @@ static void lt9611_remove(struct i2c_client *client) struct lt9611 *lt9611 = i2c_get_clientdata(client); disable_irq(client->irq); - lt9611_audio_exit(lt9611); drm_bridge_remove(<9611->bridge); regulator_bulk_disable(ARRAY_SIZE(lt9611->supplies), lt9611->supplies); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index db9a5466060b..f4c3ff1fdc69 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -522,7 +522,8 @@ static void lt9611uxc_audio_shutdown(struct device *dev, void *data) } static int lt9611uxc_hdmi_i2s_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) + struct device_node *endpoint, + void *data) { struct of_endpoint of_ep; int ret; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 127da22011b3..bf2d1632b020 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -815,7 +815,8 @@ static int sii902x_audio_get_eld(struct device *dev, void *data, } static int sii902x_audio_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) + struct device_node *endpoint, + void *data) { struct of_endpoint of_ep; int ret; @@ -840,7 +841,6 @@ static const struct hdmi_codec_ops sii902x_audio_codec_ops = { .mute_stream = sii902x_audio_mute, .get_eld = sii902x_audio_get_eld, .get_dai_id = sii902x_audio_get_dai_id, - .no_capture_mute = 1, }; static int sii902x_audio_codec_init(struct sii902x *sii902x, @@ -863,11 +863,12 @@ static int sii902x_audio_codec_init(struct sii902x *sii902x, .i2s = 1, /* Only i2s support for now. */ .spdif = 0, .max_i2s_channels = 0, + .no_capture_mute = 1, }; u8 lanes[4]; int num_lanes, i; - if (!of_property_read_bool(dev->of_node, "#sound-dai-cells")) { + if (!of_property_present(dev->of_node, "#sound-dai-cells")) { dev_dbg(dev, "%s: No \"#sound-dai-cells\", no audio\n", __func__); return 0; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c index f1c5a8d0fa90..2c903c9fe805 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c @@ -148,7 +148,8 @@ static int dw_hdmi_i2s_get_eld(struct device *dev, void *data, uint8_t *buf, } static int dw_hdmi_i2s_get_dai_id(struct snd_soc_component *component, - struct device_node *endpoint) + struct device_node *endpoint, + void *data) { struct of_endpoint of_ep; int ret; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index c686671e4850..b281cabfe992 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -361,22 +361,6 @@ static int dw_hdmi_qp_config_drm_infoframe(struct dw_hdmi_qp *hdmi, return 0; } -static int dw_hdmi_qp_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct dw_hdmi_qp *hdmi = bridge->driver_private; - int ret; - - ret = drm_atomic_helper_connector_hdmi_check(conn_state->connector, - conn_state->state); - if (ret) - dev_dbg(hdmi->dev, "%s failed: %d\n", __func__, ret); - - return ret; -} - static void dw_hdmi_qp_bridge_atomic_enable(struct drm_bridge *bridge, struct drm_bridge_state *old_state) { @@ -503,7 +487,6 @@ static const struct drm_bridge_funcs dw_hdmi_qp_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_reset = drm_atomic_helper_bridge_reset, - .atomic_check = dw_hdmi_qp_bridge_atomic_check, .atomic_enable = dw_hdmi_qp_bridge_atomic_enable, .atomic_disable = dw_hdmi_qp_bridge_atomic_disable, .detect = dw_hdmi_qp_bridge_detect, diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 6a4e892afcf8..8d22b7627d41 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -15,6 +15,7 @@ if DRM_DISPLAY_HELPER config DRM_BRIDGE_CONNECTOR bool + select DRM_DISPLAY_HDMI_AUDIO_HELPER select DRM_DISPLAY_HDMI_STATE_HELPER help DRM connector implementation terminating DRM bridge chains. @@ -75,6 +76,12 @@ config DRM_DISPLAY_HDCP_HELPER help DRM display helpers for HDCP. +config DRM_DISPLAY_HDMI_AUDIO_HELPER + bool + help + DRM display helpers for HDMI Audio functionality (generic HDMI Codec + implementation). + config DRM_DISPLAY_HDMI_HELPER bool help @@ -82,6 +89,7 @@ config DRM_DISPLAY_HDMI_HELPER config DRM_DISPLAY_HDMI_STATE_HELPER bool + select DRM_DISPLAY_HDMI_AUDIO_HELPER select DRM_DISPLAY_HDMI_HELPER help DRM KMS state helpers for HDMI. diff --git a/drivers/gpu/drm/display/Makefile b/drivers/gpu/drm/display/Makefile index 629c834c3192..b17879b957d5 100644 --- a/drivers/gpu/drm/display/Makefile +++ b/drivers/gpu/drm/display/Makefile @@ -14,6 +14,8 @@ drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_TUNNEL) += \ drm_display_helper-$(CONFIG_DRM_DISPLAY_DSC_HELPER) += \ drm_dsc_helper.o drm_display_helper-$(CONFIG_DRM_DISPLAY_HDCP_HELPER) += drm_hdcp_helper.o +drm_display_helper-$(CONFIG_DRM_DISPLAY_HDMI_AUDIO_HELPER) += \ + drm_hdmi_audio_helper.o drm_display_helper-$(CONFIG_DRM_DISPLAY_HDMI_HELPER) += \ drm_hdmi_helper.o \ drm_scdc_helper.o diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 512ced87ea18..56f977bbe62d 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -17,7 +17,9 @@ #include <drm/drm_edid.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> +#include <drm/drm_print.h> #include <drm/drm_probe_helper.h> +#include <drm/display/drm_hdmi_audio_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/display/drm_hdmi_state_helper.h> @@ -180,11 +182,15 @@ drm_bridge_connector_detect(struct drm_connector *connector, bool force) struct drm_bridge_connector *bridge_connector = to_drm_bridge_connector(connector); struct drm_bridge *detect = bridge_connector->bridge_detect; + struct drm_bridge *hdmi = bridge_connector->bridge_hdmi; enum drm_connector_status status; if (detect) { status = detect->funcs->detect(detect); + if (hdmi) + drm_atomic_helper_connector_hdmi_hotplug(connector, status); + drm_bridge_connector_hpd_notify(connector, status); } else { switch (connector->connector_type) { @@ -203,6 +209,16 @@ drm_bridge_connector_detect(struct drm_connector *connector, bool force) return status; } +static void drm_bridge_connector_force(struct drm_connector *connector) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *hdmi = bridge_connector->bridge_hdmi; + + if (hdmi) + drm_atomic_helper_connector_hdmi_force(connector); +} + static void drm_bridge_connector_debugfs_init(struct drm_connector *connector, struct dentry *root) { @@ -231,6 +247,7 @@ static void drm_bridge_connector_reset(struct drm_connector *connector) static const struct drm_connector_funcs drm_bridge_connector_funcs = { .reset = drm_bridge_connector_reset, .detect = drm_bridge_connector_detect, + .force = drm_bridge_connector_force, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, @@ -277,6 +294,14 @@ static int drm_bridge_connector_get_modes(struct drm_connector *connector) struct drm_bridge *bridge; /* + * If there is a HDMI bridge, EDID has been updated as a part of + * the .detect(). Just update the modes here. + */ + bridge = bridge_connector->bridge_hdmi; + if (bridge) + return drm_edid_connector_add_modes(connector); + + /* * If display exposes EDID, then we parse that in the normal way to * build table of supported modes. */ @@ -313,11 +338,24 @@ drm_bridge_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +static int drm_bridge_connector_atomic_check(struct drm_connector *connector, + struct drm_atomic_state *state) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + + if (bridge_connector->bridge_hdmi) + return drm_atomic_helper_connector_hdmi_check(connector, state); + + return 0; +} + static const struct drm_connector_helper_funcs drm_bridge_connector_helper_funcs = { .get_modes = drm_bridge_connector_get_modes, .mode_valid = drm_bridge_connector_mode_valid, .enable_hpd = drm_bridge_connector_enable_hpd, .disable_hpd = drm_bridge_connector_disable_hpd, + .atomic_check = drm_bridge_connector_atomic_check, }; static enum drm_mode_status @@ -368,10 +406,94 @@ static int drm_bridge_connector_write_infoframe(struct drm_connector *connector, return bridge->funcs->hdmi_write_infoframe(bridge, type, buffer, len); } +static const struct drm_edid * +drm_bridge_connector_read_edid(struct drm_connector *connector) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_edid; + if (!bridge) + return NULL; + + return drm_bridge_edid_read(bridge, connector); +} + static const struct drm_connector_hdmi_funcs drm_bridge_connector_hdmi_funcs = { .tmds_char_rate_valid = drm_bridge_connector_tmds_char_rate_valid, .clear_infoframe = drm_bridge_connector_clear_infoframe, .write_infoframe = drm_bridge_connector_write_infoframe, + .read_edid = drm_bridge_connector_read_edid, +}; + +static int drm_bridge_connector_audio_startup(struct drm_connector *connector) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return -EINVAL; + + if (!bridge->funcs->hdmi_audio_startup) + return 0; + + return bridge->funcs->hdmi_audio_startup(connector, bridge); +} + +static int drm_bridge_connector_audio_prepare(struct drm_connector *connector, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return -EINVAL; + + return bridge->funcs->hdmi_audio_prepare(connector, bridge, fmt, hparms); +} + +static void drm_bridge_connector_audio_shutdown(struct drm_connector *connector) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return; + + bridge->funcs->hdmi_audio_shutdown(connector, bridge); +} + +static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connector, + bool enable, int direction) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return -EINVAL; + + if (bridge->funcs->hdmi_audio_mute_stream) + return bridge->funcs->hdmi_audio_mute_stream(connector, bridge, + enable, direction); + else + return -ENOTSUPP; +} + +static const struct drm_connector_hdmi_audio_funcs drm_bridge_connector_hdmi_audio_funcs = { + .startup = drm_bridge_connector_audio_startup, + .prepare = drm_bridge_connector_audio_prepare, + .shutdown = drm_bridge_connector_audio_shutdown, + .mute_stream = drm_bridge_connector_audio_mute_stream, }; /* ----------------------------------------------------------------------------- @@ -473,7 +595,12 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - if (bridge_connector->bridge_hdmi) + if (bridge_connector->bridge_hdmi) { + if (!connector->ycbcr_420_allowed) + supported_formats &= ~BIT(HDMI_COLORSPACE_YUV420); + + bridge = bridge_connector->bridge_hdmi; + ret = drmm_connector_hdmi_init(drm, connector, bridge_connector->bridge_hdmi->vendor, bridge_connector->bridge_hdmi->product, @@ -482,12 +609,31 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, connector_type, ddc, supported_formats, max_bpc); - else + if (ret) + return ERR_PTR(ret); + + if (bridge->hdmi_audio_max_i2s_playback_channels || + bridge->hdmi_audio_spdif_playback) { + if (!bridge->funcs->hdmi_audio_prepare || + !bridge->funcs->hdmi_audio_shutdown) + return ERR_PTR(-EINVAL); + + ret = drm_connector_hdmi_audio_init(connector, + bridge->hdmi_audio_dev, + &drm_bridge_connector_hdmi_audio_funcs, + bridge->hdmi_audio_max_i2s_playback_channels, + bridge->hdmi_audio_spdif_playback, + bridge->hdmi_audio_dai_port); + if (ret) + return ERR_PTR(ret); + } + } else { ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) - return ERR_PTR(ret); + if (ret) + return ERR_PTR(ret); + } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index 48b2df120086..90fe07a89260 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -1896,8 +1896,8 @@ static void destroy_mgr(struct drm_dp_tunnel_mgr *mgr) * * Creates a DP tunnel manager for @dev. * - * Returns a pointer to the tunnel manager if created successfully or NULL in - * case of an error. + * Returns a pointer to the tunnel manager if created successfully or error + * pointer in case of failure. */ struct drm_dp_tunnel_mgr * drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) @@ -1907,7 +1907,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) - return NULL; + return ERR_PTR(-ENOMEM); mgr->dev = dev; init_waitqueue_head(&mgr->bw_req_queue); @@ -1916,7 +1916,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) if (!mgr->groups) { kfree(mgr); - return NULL; + return ERR_PTR(-ENOMEM); } #ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG @@ -1927,7 +1927,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) if (!init_group(mgr, &mgr->groups[i])) { destroy_mgr(mgr); - return NULL; + return ERR_PTR(-ENOMEM); } mgr->group_count++; diff --git a/drivers/gpu/drm/display/drm_hdmi_audio_helper.c b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c new file mode 100644 index 000000000000..05afc9f0bdd6 --- /dev/null +++ b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (c) 2024 Linaro Ltd + */ + +#include <linux/mutex.h> +#include <linux/of_graph.h> +#include <linux/platform_device.h> + +#include <drm/drm_connector.h> +#include <drm/drm_device.h> +#include <drm/display/drm_hdmi_audio_helper.h> + +#include <sound/hdmi-codec.h> + +static int drm_connector_hdmi_audio_startup(struct device *dev, void *data) +{ + struct drm_connector *connector = data; + const struct drm_connector_hdmi_audio_funcs *funcs = + connector->hdmi_audio.funcs; + + if (funcs->startup) + return funcs->startup(connector); + + return 0; +} + +static int drm_connector_hdmi_audio_prepare(struct device *dev, void *data, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms) +{ + struct drm_connector *connector = data; + const struct drm_connector_hdmi_audio_funcs *funcs = + connector->hdmi_audio.funcs; + + return funcs->prepare(connector, fmt, hparms); +} + +static void drm_connector_hdmi_audio_shutdown(struct device *dev, void *data) +{ + struct drm_connector *connector = data; + const struct drm_connector_hdmi_audio_funcs *funcs = + connector->hdmi_audio.funcs; + + return funcs->shutdown(connector); +} + +static int drm_connector_hdmi_audio_mute_stream(struct device *dev, void *data, + bool enable, int direction) +{ + struct drm_connector *connector = data; + const struct drm_connector_hdmi_audio_funcs *funcs = + connector->hdmi_audio.funcs; + + if (funcs->mute_stream) + return funcs->mute_stream(connector, enable, direction); + + return -ENOTSUPP; +} + +static int drm_connector_hdmi_audio_get_dai_id(struct snd_soc_component *comment, + struct device_node *endpoint, + void *data) +{ + struct drm_connector *connector = data; + struct of_endpoint of_ep; + int ret; + + if (connector->hdmi_audio.dai_port < 0) + return -ENOTSUPP; + + ret = of_graph_parse_endpoint(endpoint, &of_ep); + if (ret < 0) + return ret; + + if (of_ep.port == connector->hdmi_audio.dai_port) + return 0; + + return -EINVAL; +} + +static int drm_connector_hdmi_audio_get_eld(struct device *dev, void *data, + uint8_t *buf, size_t len) +{ + struct drm_connector *connector = data; + + mutex_lock(&connector->eld_mutex); + memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + mutex_unlock(&connector->eld_mutex); + + return 0; +} + +static int drm_connector_hdmi_audio_hook_plugged_cb(struct device *dev, + void *data, + hdmi_codec_plugged_cb fn, + struct device *codec_dev) +{ + struct drm_connector *connector = data; + + mutex_lock(&connector->hdmi_audio.lock); + + connector->hdmi_audio.plugged_cb = fn; + connector->hdmi_audio.plugged_cb_dev = codec_dev; + + fn(codec_dev, connector->hdmi_audio.last_state); + + mutex_unlock(&connector->hdmi_audio.lock); + + return 0; +} + +void drm_connector_hdmi_audio_plugged_notify(struct drm_connector *connector, + bool plugged) +{ + mutex_lock(&connector->hdmi_audio.lock); + + connector->hdmi_audio.last_state = plugged; + + if (connector->hdmi_audio.plugged_cb && + connector->hdmi_audio.plugged_cb_dev) + connector->hdmi_audio.plugged_cb(connector->hdmi_audio.plugged_cb_dev, + connector->hdmi_audio.last_state); + + mutex_unlock(&connector->hdmi_audio.lock); +} +EXPORT_SYMBOL(drm_connector_hdmi_audio_plugged_notify); + +static const struct hdmi_codec_ops drm_connector_hdmi_audio_ops = { + .audio_startup = drm_connector_hdmi_audio_startup, + .prepare = drm_connector_hdmi_audio_prepare, + .audio_shutdown = drm_connector_hdmi_audio_shutdown, + .mute_stream = drm_connector_hdmi_audio_mute_stream, + .get_eld = drm_connector_hdmi_audio_get_eld, + .get_dai_id = drm_connector_hdmi_audio_get_dai_id, + .hook_plugged_cb = drm_connector_hdmi_audio_hook_plugged_cb, +}; + +/** + * drm_connector_hdmi_audio_init - Initialize HDMI Codec device for the DRM connector + * @connector: A pointer to the connector to allocate codec for + * @hdmi_codec_dev: device to be used as a parent for the HDMI Codec + * @funcs: callbacks for this HDMI Codec + * @max_i2s_playback_channels: maximum number of playback I2S channels + * @spdif_playback: set if HDMI codec has S/PDIF playback port + * @dai_port: sound DAI port, -1 if it is not enabled + * + * Create a HDMI codec device to be used with the specified connector. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_connector_hdmi_audio_init(struct drm_connector *connector, + struct device *hdmi_codec_dev, + const struct drm_connector_hdmi_audio_funcs *funcs, + unsigned int max_i2s_playback_channels, + bool spdif_playback, + int dai_port) +{ + struct hdmi_codec_pdata codec_pdata = { + .ops = &drm_connector_hdmi_audio_ops, + .max_i2s_channels = max_i2s_playback_channels, + .i2s = !!max_i2s_playback_channels, + .spdif = spdif_playback, + .no_i2s_capture = true, + .no_spdif_capture = true, + .data = connector, + }; + struct platform_device *pdev; + + if (!funcs || + !funcs->prepare || + !funcs->shutdown) + return -EINVAL; + + connector->hdmi_audio.funcs = funcs; + connector->hdmi_audio.dai_port = dai_port; + + pdev = platform_device_register_data(hdmi_codec_dev, + HDMI_CODEC_DRV_NAME, + PLATFORM_DEVID_AUTO, + &codec_pdata, sizeof(codec_pdata)); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + connector->hdmi_audio.codec_pdev = pdev; + + return 0; +} +EXPORT_SYMBOL(drm_connector_hdmi_audio_init); diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index d678c635a693..9b2ee2385634 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -5,6 +5,7 @@ #include <drm/drm_edid.h> #include <drm/drm_print.h> +#include <drm/display/drm_hdmi_audio_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/display/drm_hdmi_state_helper.h> @@ -502,6 +503,9 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, connector_state_get_mode(new_conn_state); int ret; + if (!new_conn_state->crtc || !new_conn_state->best_encoder) + return 0; + new_conn_state->hdmi.is_limited_range = hdmi_is_limited_range(connector, new_conn_state); ret = hdmi_compute_config(connector, new_conn_state, mode); @@ -777,3 +781,61 @@ drm_atomic_helper_connector_hdmi_clear_audio_infoframe(struct drm_connector *con return ret; } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_clear_audio_infoframe); + +static void +drm_atomic_helper_connector_hdmi_update(struct drm_connector *connector, + enum drm_connector_status status) +{ + const struct drm_edid *drm_edid; + + if (status == connector_status_disconnected) { + // TODO: also handle CEC and scramber, HDMI sink disconnected. + drm_connector_hdmi_audio_plugged_notify(connector, false); + drm_edid_connector_update(connector, NULL); + return; + } + + if (connector->hdmi.funcs->read_edid) + drm_edid = connector->hdmi.funcs->read_edid(connector); + else + drm_edid = drm_edid_read(connector); + + drm_edid_connector_update(connector, drm_edid); + + drm_edid_free(drm_edid); + + if (status == connector_status_connected) { + // TODO: also handle CEC and scramber, HDMI sink is now connected. + drm_connector_hdmi_audio_plugged_notify(connector, true); + } +} + +/** + * drm_atomic_helper_connector_hdmi_hotplug - Handle the hotplug event for the HDMI connector + * @connector: A pointer to the HDMI connector + * @status: Connection status + * + * This function should be called as a part of the .detect() / .detect_ctx() + * callbacks, updating the HDMI-specific connector's data. + */ +void drm_atomic_helper_connector_hdmi_hotplug(struct drm_connector *connector, + enum drm_connector_status status) +{ + drm_atomic_helper_connector_hdmi_update(connector, status); +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_hotplug); + +/** + * drm_atomic_helper_connector_hdmi_force - HDMI Connector implementation of the force callback + * @connector: A pointer to the HDMI connector + * + * This function implements the .force() callback for the HDMI connectors. It + * can either be used directly as the callback or should be called from within + * the .force() callback implementation to maintain the HDMI-specific + * connector's data. + */ +void drm_atomic_helper_connector_hdmi_force(struct drm_connector *connector) +{ + drm_atomic_helper_connector_hdmi_update(connector, connector->status); +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_force); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index c6af46dd02bf..241a384ebce3 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -207,6 +207,10 @@ void drm_bridge_add(struct drm_bridge *bridge) { mutex_init(&bridge->hpd_mutex); + if (bridge->ops & DRM_BRIDGE_OP_HDMI) + bridge->ycbcr_420_allowed = !!(bridge->supported_formats & + BIT(HDMI_COLORSPACE_YUV420)); + mutex_lock(&bridge_lock); list_add_tail(&bridge->list, &bridge_list); mutex_unlock(&bridge_lock); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 251f94313717..aca442c25209 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -743,6 +743,15 @@ retry: if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; + for (i = 0; i < count; i++) { + struct drm_connector *connector = connectors[i]; + + if (connector->has_tile) + drm_client_get_tile_offsets(dev, connectors, connector_count, + modes, offsets, i, + connector->tile_h_loc, connector->tile_v_loc); + } + /* * If the BIOS didn't enable everything it could, fall back to have the * same user experiencing of lighting up as much as possible like the diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index ae6e71305f30..5f24d6b41cc6 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -33,6 +33,7 @@ #include <drm/drm_sysfs.h> #include <drm/drm_utils.h> +#include <linux/platform_device.h> #include <linux/property.h> #include <linux/uaccess.h> @@ -281,6 +282,7 @@ static int drm_connector_init_only(struct drm_device *dev, mutex_init(&connector->eld_mutex); mutex_init(&connector->edid_override_mutex); mutex_init(&connector->hdmi.infoframes.lock); + mutex_init(&connector->hdmi_audio.lock); connector->edid_blob_ptr = NULL; connector->epoch_counter = 0; connector->tile_blob_ptr = NULL; @@ -590,6 +592,9 @@ int drmm_connector_hdmi_init(struct drm_device *dev, if (!supported_formats || !(supported_formats & BIT(HDMI_COLORSPACE_RGB))) return -EINVAL; + if (connector->ycbcr_420_allowed != !!(supported_formats & BIT(HDMI_COLORSPACE_YUV420))) + return -EINVAL; + if (!(max_bpc == 8 || max_bpc == 10 || max_bpc == 12)) return -EINVAL; @@ -714,6 +719,8 @@ void drm_connector_cleanup(struct drm_connector *connector) DRM_CONNECTOR_REGISTERED)) drm_connector_unregister(connector); + platform_device_unregister(connector->hdmi_audio.codec_pdev); + if (connector->privacy_screen) { drm_privacy_screen_put(connector->privacy_screen); connector->privacy_screen = NULL; @@ -750,6 +757,7 @@ void drm_connector_cleanup(struct drm_connector *connector) connector->funcs->atomic_destroy_state(connector, connector->state); + mutex_destroy(&connector->hdmi_audio.lock); mutex_destroy(&connector->hdmi.infoframes.lock); mutex_destroy(&connector->mutex); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c2c172eb25df..3cf440eee8a2 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -26,6 +26,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include <linux/cgroup_dmem.h> #include <linux/debugfs.h> #include <linux/fs.h> #include <linux/module.h> @@ -820,6 +821,37 @@ void drm_dev_put(struct drm_device *dev) } EXPORT_SYMBOL(drm_dev_put); +static void drmm_cg_unregister_region(struct drm_device *dev, void *arg) +{ + dmem_cgroup_unregister_region(arg); +} + +/** + * drmm_cgroup_register_region - Register a region of a DRM device to cgroups + * @dev: device for region + * @region_name: Region name for registering + * @size: Size of region in bytes + * + * This decreases the ref-count of @dev by one. The device is destroyed if the + * ref-count drops to zero. + */ +struct dmem_cgroup_region *drmm_cgroup_register_region(struct drm_device *dev, const char *region_name, u64 size) +{ + struct dmem_cgroup_region *region; + int ret; + + region = dmem_cgroup_register_region(size, "drm/%s/%s", dev->unique, region_name); + if (IS_ERR_OR_NULL(region)) + return region; + + ret = drmm_add_action_or_reset(dev, drmm_cg_unregister_region, region); + if (ret) + return ERR_PTR(ret); + + return region; +} +EXPORT_SYMBOL_GPL(drmm_cgroup_register_region); + static int create_compat_control_link(struct drm_device *dev) { struct drm_minor *minor; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index c9008113111b..fb3614a7ba44 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1354,14 +1354,14 @@ int drm_fb_helper_set_par(struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_set_par); -static void pan_set(struct drm_fb_helper *fb_helper, int x, int y) +static void pan_set(struct drm_fb_helper *fb_helper, int dx, int dy) { struct drm_mode_set *mode_set; mutex_lock(&fb_helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &fb_helper->client) { - mode_set->x = x; - mode_set->y = y; + mode_set->x += dx; + mode_set->y += dy; } mutex_unlock(&fb_helper->client.modeset_mutex); } @@ -1370,16 +1370,18 @@ static int pan_display_atomic(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - int ret; + int ret, dx, dy; - pan_set(fb_helper, var->xoffset, var->yoffset); + dx = var->xoffset - info->var.xoffset; + dy = var->yoffset - info->var.yoffset; + pan_set(fb_helper, dx, dy); ret = drm_client_modeset_commit_locked(&fb_helper->client); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } else - pan_set(fb_helper, info->var.xoffset, info->var.yoffset); + pan_set(fb_helper, -dx, -dy); return ret; } diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index cb5f22f5bbb6..2289e71e2fa2 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -845,6 +845,16 @@ static void print_size(struct drm_printer *p, const char *stat, drm_printf(p, "drm-%s-%s:\t%llu%s\n", stat, region, sz, units[u]); } +int drm_memory_stats_is_zero(const struct drm_memory_stats *stats) +{ + return (stats->shared == 0 && + stats->private == 0 && + stats->resident == 0 && + stats->purgeable == 0 && + stats->active == 0); +} +EXPORT_SYMBOL(drm_memory_stats_is_zero); + /** * drm_print_memory_stats - A helper to print memory stats * @p: The printer to print output to @@ -860,7 +870,9 @@ void drm_print_memory_stats(struct drm_printer *p, { print_size(p, "total", region, stats->private + stats->shared); print_size(p, "shared", region, stats->shared); - print_size(p, "active", region, stats->active); + + if (supported_status & DRM_GEM_OBJECT_ACTIVE) + print_size(p, "active", region, stats->active); if (supported_status & DRM_GEM_OBJECT_RESIDENT) print_size(p, "resident", region, stats->resident); @@ -893,15 +905,13 @@ void drm_show_memory_stats(struct drm_printer *p, struct drm_file *file) if (obj->funcs && obj->funcs->status) { s = obj->funcs->status(obj); - supported_status = DRM_GEM_OBJECT_RESIDENT | - DRM_GEM_OBJECT_PURGEABLE; + supported_status |= s; } - if (drm_gem_object_is_shared_for_memory_stats(obj)) { + if (drm_gem_object_is_shared_for_memory_stats(obj)) status.shared += obj->size; - } else { + else status.private += obj->size; - } if (s & DRM_GEM_OBJECT_RESIDENT) { status.resident += add_size; @@ -914,6 +924,7 @@ void drm_show_memory_stats(struct drm_printer *p, struct drm_file *file) if (!dma_resv_test_signaled(obj->resv, dma_resv_usage_rw(true))) { status.active += add_size; + supported_status |= DRM_GEM_OBJECT_ACTIVE; /* If still active, don't count as purgeable: */ s &= ~DRM_GEM_OBJECT_PURGEABLE; diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 513c04f74420..e72f855fc495 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1286,14 +1286,11 @@ EXPORT_SYMBOL(drm_mode_set_name); */ int drm_mode_vrefresh(const struct drm_display_mode *mode) { - unsigned int num, den; + unsigned int num = 1, den = 1; if (mode->htotal == 0 || mode->vtotal == 0) return 0; - num = mode->clock; - den = mode->htotal * mode->vtotal; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) num *= 2; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -1301,6 +1298,12 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->vscan > 1) den *= mode->vscan; + if (check_mul_overflow(mode->clock, num, &num)) + return 0; + + if (check_mul_overflow(mode->htotal * mode->vtotal, den, &den)) + return 0; + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den); } EXPORT_SYMBOL(drm_mode_vrefresh); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index 2379e501c08b..9940e96d35e3 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -414,7 +414,7 @@ bool drm_is_panel_follower(struct device *dev) * don't bother trying to parse it here. We just need to know if the * property is there. */ - return of_property_read_bool(dev->of_node, "panel"); + return of_property_present(dev->of_node, "panel"); } EXPORT_SYMBOL(drm_is_panel_follower); diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index ef2d490965ba..bcf248f69252 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -931,7 +931,7 @@ impl QrImage<'_> { /// They must remain valid for the duration of the function call. #[no_mangle] pub unsafe extern "C" fn drm_panic_qr_generate( - url: *const i8, + url: *const kernel::ffi::c_char, data: *mut u8, data_len: usize, data_size: usize, diff --git a/drivers/gpu/drm/drm_vblank_work.c b/drivers/gpu/drm/drm_vblank_work.c index 1752ffb44e1d..9cc71120246f 100644 --- a/drivers/gpu/drm/drm_vblank_work.c +++ b/drivers/gpu/drm/drm_vblank_work.c @@ -277,7 +277,7 @@ int drm_vblank_worker_init(struct drm_vblank_crtc *vblank) INIT_LIST_HEAD(&vblank->pending_work); init_waitqueue_head(&vblank->work_wait_queue); - worker = kthread_create_worker(0, "card%d-crtc%d", + worker = kthread_run_worker(0, "card%d-crtc%d", vblank->dev->primary->index, vblank->pipe); if (IS_ERR(worker)) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c index 7aa5f14d0c87..3a221923f15d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c @@ -9,7 +9,6 @@ #include "etnaviv_gem.h" #include "etnaviv_gpu.h" #include "etnaviv_mmu.h" -#include "etnaviv_perfmon.h" #define SUBALLOC_SIZE SZ_512K #define SUBALLOC_GRANULE SZ_4K @@ -100,7 +99,7 @@ retry: mutex_unlock(&suballoc->lock); ret = wait_event_interruptible_timeout(suballoc->free_event, suballoc->free_space, - msecs_to_jiffies(10 * 1000)); + secs_to_jiffies(10)); if (!ret) { dev_err(suballoc->dev, "Timeout waiting for cmdbuf space\n"); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 00707001d112..3e91747ed339 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,16 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = { ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW), }; -DEFINE_DRM_GEM_FOPS(fops); +static void etnaviv_show_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + drm_show_memory_stats(p, file); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = drm_show_fdinfo, +}; static const struct drm_driver etnaviv_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER, @@ -498,6 +507,7 @@ static const struct drm_driver etnaviv_drm_driver = { #ifdef CONFIG_DEBUG_FS .debugfs_init = etnaviv_debugfs_init, #endif + .show_fdinfo = etnaviv_show_fdinfo, .ioctls = etnaviv_ioctls, .num_ioctls = DRM_ETNAVIV_NUM_IOCTLS, .fops = &fops, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 16473c371444..2f844e82bc46 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -342,6 +342,7 @@ void *etnaviv_gem_vmap(struct drm_gem_object *obj) static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) { struct page **pages; + pgprot_t prot; lockdep_assert_held(&obj->lock); @@ -349,8 +350,19 @@ static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) if (IS_ERR(pages)) return NULL; - return vmap(pages, obj->base.size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + switch (obj->flags & ETNA_BO_CACHE_MASK) { + case ETNA_BO_CACHED: + prot = PAGE_KERNEL; + break; + case ETNA_BO_UNCACHED: + prot = pgprot_noncached(PAGE_KERNEL); + break; + case ETNA_BO_WC: + default: + prot = pgprot_writecombine(PAGE_KERNEL); + } + + return vmap(pages, obj->base.size >> PAGE_SHIFT, VM_MAP, prot); } static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op) @@ -528,6 +540,17 @@ void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) mutex_unlock(&priv->gem_lock); } +static enum drm_gem_object_status etnaviv_gem_status(struct drm_gem_object *obj) +{ + struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); + enum drm_gem_object_status status = 0; + + if (etnaviv_obj->pages) + status |= DRM_GEM_OBJECT_RESIDENT; + + return status; +} + static const struct vm_operations_struct vm_ops = { .fault = etnaviv_gem_fault, .open = drm_gem_vm_open, @@ -541,6 +564,7 @@ static const struct drm_gem_object_funcs etnaviv_gem_object_funcs = { .get_sg_table = etnaviv_gem_prime_get_sg_table, .vmap = etnaviv_gem_prime_vmap, .mmap = etnaviv_gem_mmap, + .status = etnaviv_gem_status, .vm_ops = &vm_ops, }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index 687555aae807..e5ee82a0674c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -44,9 +44,7 @@ struct etnaviv_gem_object { u32 flags; struct list_head gem_node; - struct etnaviv_gpu *gpu; /* non-null if active */ atomic_t gpu_active; - u32 access; struct page **pages; struct sg_table *sgt; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 2d4c112ce033..cf0d9049bcf1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> +#include <linux/reset.h> #include <linux/thermal.h> #include "etnaviv_cmdbuf.h" @@ -172,6 +173,29 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) return 0; } +static int etnaviv_gpu_reset_deassert(struct etnaviv_gpu *gpu) +{ + int ret; + + /* + * 32 core clock cycles (slowest clock) required before deassertion + * 1 microsecond might match all implementations without computation + */ + usleep_range(1, 2); + + ret = reset_control_deassert(gpu->rst); + if (ret) + return ret; + + /* + * 128 core clock cycles (slowest clock) required before any activity on AHB + * 1 microsecond might match all implementations without computation + */ + usleep_range(1, 2); + + return 0; +} + static inline bool etnaviv_is_model_rev(struct etnaviv_gpu *gpu, u32 model, u32 revision) { return gpu->identity.model == model && @@ -799,6 +823,12 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) goto pm_put; } + ret = etnaviv_gpu_reset_deassert(gpu); + if (ret) { + dev_err(gpu->dev, "GPU reset deassert failed\n"); + goto fail; + } + etnaviv_hw_identify(gpu); if (gpu->identity.model == 0) { @@ -1860,6 +1890,17 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) if (IS_ERR(gpu->mmio)) return PTR_ERR(gpu->mmio); + + /* Get Reset: */ + gpu->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); + if (IS_ERR(gpu->rst)) + return dev_err_probe(dev, PTR_ERR(gpu->rst), + "failed to get reset\n"); + + err = reset_control_assert(gpu->rst); + if (err) + return dev_err_probe(dev, err, "failed to assert reset\n"); + /* Get Interrupt: */ gpu->irq = platform_get_irq(pdev, 0); if (gpu->irq < 0) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 4d8a7d48ade3..5cb46c84e03a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -93,6 +93,7 @@ struct etnaviv_event { struct etnaviv_cmdbuf_suballoc; struct regulator; struct clk; +struct reset_control; #define ETNA_NR_EVENTS 30 @@ -158,6 +159,7 @@ struct etnaviv_gpu { struct clk *clk_reg; struct clk *clk_core; struct clk *clk_shader; + struct reset_control *rst; unsigned int freq_scale; unsigned int fe_waitcycles; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 7e065b3723cf..df5192083b20 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -19,12 +19,6 @@ static void etnaviv_context_unmap(struct etnaviv_iommu_context *context, size_t unmapped_page, unmapped = 0; size_t pgsize = SZ_4K; - if (!IS_ALIGNED(iova | size, pgsize)) { - pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%zx\n", - iova, size, pgsize); - return; - } - while (unmapped < size) { unmapped_page = context->global->ops->unmap(context, iova, pgsize); @@ -45,12 +39,6 @@ static int etnaviv_context_map(struct etnaviv_iommu_context *context, size_t orig_size = size; int ret = 0; - if (!IS_ALIGNED(iova | paddr | size, pgsize)) { - pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%zx\n", - iova, &paddr, size, pgsize); - return -EINVAL; - } - while (size) { ret = context->global->ops->map(context, iova, paddr, pgsize, prot); @@ -82,11 +70,19 @@ static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, return -EINVAL; for_each_sgtable_dma_sg(sgt, sg, i) { - phys_addr_t pa = sg_dma_address(sg) - sg->offset; - unsigned int da_len = sg_dma_len(sg) + sg->offset; + phys_addr_t pa = sg_dma_address(sg); + unsigned int da_len = sg_dma_len(sg); unsigned int bytes = min_t(unsigned int, da_len, va_len); - VERB("map[%d]: %08x %pap(%x)", i, iova, &pa, bytes); + VERB("map[%d]: %08x %pap(%x)", i, da, &pa, bytes); + + if (!IS_ALIGNED(iova | pa | bytes, SZ_4K)) { + dev_err(context->global->dev, + "unaligned: iova 0x%x pa %pa size 0x%x\n", + iova, &pa, bytes); + ret = -EINVAL; + goto fail; + } ret = etnaviv_context_map(context, da, pa, bytes, prot); if (ret) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 7a57d4a23e41..176fd8871759 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1660,7 +1660,6 @@ static const struct hdmi_codec_ops audio_codec_ops = { .audio_shutdown = hdmi_audio_shutdown, .mute_stream = hdmi_audio_mute, .get_eld = hdmi_audio_get_eld, - .no_capture_mute = 1, }; static int hdmi_register_audio_device(struct hdmi_context *hdata) @@ -1669,6 +1668,7 @@ static int hdmi_register_audio_device(struct hdmi_context *hdata) .ops = &audio_codec_ops, .max_i2s_channels = 6, .i2s = 1, + .no_capture_mute = 1, }; hdata->audio.pdev = platform_device_register_data( diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index d25c75e60d3d..95a4ed599d98 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o +hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_i2c.o \ + dp/dp_aux.o dp/dp_link.o dp/dp_hw.o hibmc_drm_dp.o obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c new file mode 100644 index 000000000000..0a903cce1fa9 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/minmax.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include "dp_comm.h" +#include "dp_reg.h" + +#define HIBMC_AUX_CMD_REQ_LEN GENMASK(7, 4) +#define HIBMC_AUX_CMD_ADDR GENMASK(27, 8) +#define HIBMC_AUX_CMD_I2C_ADDR_ONLY BIT(28) +#define HIBMC_BYTES_IN_U32 4 +#define HIBMC_AUX_I2C_WRITE_SUCCESS 0x1 +#define HIBMC_DP_MIN_PULSE_NUM 0x9 +#define BITS_IN_U8 8 + +static inline void hibmc_dp_aux_reset(struct hibmc_dp_dev *dp) +{ + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_RST_CTRL, HIBMC_DP_CFG_AUX_RST_N, 0x0); + usleep_range(10, 15); + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_RST_CTRL, HIBMC_DP_CFG_AUX_RST_N, 0x1); +} + +static void hibmc_dp_aux_read_data(struct hibmc_dp_dev *dp, u8 *buf, u8 size) +{ + u32 reg_num; + u32 value; + u32 num; + u8 i, j; + + reg_num = DIV_ROUND_UP(size, HIBMC_BYTES_IN_U32); + for (i = 0; i < reg_num; i++) { + /* number of bytes read from a single register */ + num = min(size - i * HIBMC_BYTES_IN_U32, HIBMC_BYTES_IN_U32); + value = readl(dp->base + HIBMC_DP_AUX_RD_DATA0 + i * HIBMC_BYTES_IN_U32); + /* convert the 32-bit value of the register to the buffer. */ + for (j = 0; j < num; j++) + buf[i * HIBMC_BYTES_IN_U32 + j] = value >> (j * BITS_IN_U8); + } +} + +static void hibmc_dp_aux_write_data(struct hibmc_dp_dev *dp, u8 *buf, u8 size) +{ + u32 reg_num; + u32 value; + u32 num; + u8 i, j; + + reg_num = DIV_ROUND_UP(size, HIBMC_BYTES_IN_U32); + for (i = 0; i < reg_num; i++) { + /* number of bytes written to a single register */ + num = min_t(u8, size - i * HIBMC_BYTES_IN_U32, HIBMC_BYTES_IN_U32); + value = 0; + /* obtain the 32-bit value written to a single register. */ + for (j = 0; j < num; j++) + value |= buf[i * HIBMC_BYTES_IN_U32 + j] << (j * BITS_IN_U8); + /* writing data to a single register */ + writel(value, dp->base + HIBMC_DP_AUX_WR_DATA0 + i * HIBMC_BYTES_IN_U32); + } +} + +static u32 hibmc_dp_aux_build_cmd(const struct drm_dp_aux_msg *msg) +{ + u32 aux_cmd = msg->request; + + if (msg->size) + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_REQ_LEN, (msg->size - 1)); + else + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_I2C_ADDR_ONLY, 1); + + aux_cmd |= FIELD_PREP(HIBMC_AUX_CMD_ADDR, msg->address); + + return aux_cmd; +} + +/* ret >= 0, ret is size; ret < 0, ret is err code */ +static int hibmc_dp_aux_parse_xfer(struct hibmc_dp_dev *dp, struct drm_dp_aux_msg *msg) +{ + u32 buf_data_cnt; + u32 aux_status; + + aux_status = readl(dp->base + HIBMC_DP_AUX_STATUS); + msg->reply = FIELD_GET(HIBMC_DP_CFG_AUX_STATUS, aux_status); + + if (aux_status & HIBMC_DP_CFG_AUX_TIMEOUT) + return -ETIMEDOUT; + + /* only address */ + if (!msg->size) + return 0; + + if (msg->reply != DP_AUX_NATIVE_REPLY_ACK) + return -EIO; + + buf_data_cnt = FIELD_GET(HIBMC_DP_CFG_AUX_READY_DATA_BYTE, aux_status); + + switch (msg->request) { + case DP_AUX_NATIVE_WRITE: + return msg->size; + case DP_AUX_I2C_WRITE | DP_AUX_I2C_MOT: + if (buf_data_cnt == HIBMC_AUX_I2C_WRITE_SUCCESS) + return msg->size; + else + return FIELD_GET(HIBMC_DP_CFG_AUX, aux_status); + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ | DP_AUX_I2C_MOT: + buf_data_cnt--; + if (buf_data_cnt != msg->size) { + /* only the successful part of data is read */ + return -EBUSY; + } + + /* all data is successfully read */ + hibmc_dp_aux_read_data(dp, msg->buffer, msg->size); + return msg->size; + default: + return -EINVAL; + } +} + +/* ret >= 0 ,ret is size; ret < 0, ret is err code */ +static ssize_t hibmc_dp_aux_xfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) +{ + struct hibmc_dp_dev *dp = container_of(aux, struct hibmc_dp_dev, aux); + u32 aux_cmd; + int ret; + u32 val; /* val will be assigned at the beginning of readl_poll_timeout function */ + + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA0); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA1); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA2); + writel(0, dp->base + HIBMC_DP_AUX_WR_DATA3); + + hibmc_dp_aux_write_data(dp, msg->buffer, msg->size); + + aux_cmd = hibmc_dp_aux_build_cmd(msg); + writel(aux_cmd, dp->base + HIBMC_DP_AUX_CMD_ADDR); + + /* enable aux transfer */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_REQ, 0x1); + ret = readl_poll_timeout(dp->base + HIBMC_DP_AUX_REQ, val, + !(val & HIBMC_DP_CFG_AUX_REQ), 50, 5000); + if (ret) { + hibmc_dp_aux_reset(dp); + return ret; + } + + return hibmc_dp_aux_parse_xfer(dp, msg); +} + +void hibmc_dp_aux_init(struct hibmc_dp_dev *dp) +{ + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_SYNC_LEN_SEL, 0x0); + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_TIMER_TIMEOUT, 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_AUX_REQ, HIBMC_DP_CFG_AUX_MIN_PULSE_NUM, + HIBMC_DP_MIN_PULSE_NUM); + + dp->aux.transfer = hibmc_dp_aux_xfer; + dp->aux.is_remote = 0; + drm_dp_aux_init(&dp->aux); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h new file mode 100644 index 000000000000..2c52a4476c4d --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_COMM_H +#define DP_COMM_H + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/mutex.h> +#include <linux/kernel.h> +#include <linux/bitfield.h> +#include <linux/io.h> +#include <drm/display/drm_dp_helper.h> + +#define HIBMC_DP_LANE_NUM_MAX 2 + +struct hibmc_link_status { + bool clock_recovered; + bool channel_equalized; +}; + +struct hibmc_link_cap { + u8 link_rate; + u8 lanes; +}; + +struct hibmc_dp_link { + struct hibmc_link_status status; + u8 train_set[HIBMC_DP_LANE_NUM_MAX]; + struct hibmc_link_cap cap; +}; + +struct hibmc_dp_dev { + struct drm_dp_aux aux; + struct drm_device *dev; + void __iomem *base; + struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */ + struct hibmc_dp_link link; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; +}; + +#define dp_field_modify(reg_value, mask, val) \ + do { \ + (reg_value) &= ~(mask); \ + (reg_value) |= FIELD_PREP(mask, val); \ + } while (0) \ + +#define hibmc_dp_reg_write_field(dp, offset, mask, val) \ + do { \ + typeof(dp) _dp = dp; \ + typeof(_dp->base) addr = (_dp->base + (offset)); \ + mutex_lock(&_dp->lock); \ + u32 reg_value = readl(addr); \ + dp_field_modify(reg_value, mask, val); \ + writel(reg_value, addr); \ + mutex_unlock(&_dp->lock); \ + } while (0) + +void hibmc_dp_aux_init(struct hibmc_dp_dev *dp); +int hibmc_dp_link_training(struct hibmc_dp_dev *dp); + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h new file mode 100644 index 000000000000..74dd9956144e --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_CONFIG_H +#define DP_CONFIG_H + +#define HIBMC_DP_BPP 24 +#define HIBMC_DP_SYMBOL_PER_FCLK 4 +#define HIBMC_DP_MSA1 0x20 +#define HIBMC_DP_MSA2 0x845c00 +#define HIBMC_DP_OFFSET 0x1e0000 +#define HIBMC_DP_HDCP 0x2 +#define HIBMC_DP_INT_RST 0xffff +#define HIBMC_DP_DPTX_RST 0x3ff +#define HIBMC_DP_CLK_EN 0x7 +#define HIBMC_DP_SYNC_EN_MASK 0x3 +#define HIBMC_DP_LINK_RATE_CAL 27 + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c new file mode 100644 index 000000000000..a8d543881c09 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include <linux/io.h> +#include <linux/delay.h> +#include "dp_config.h" +#include "dp_comm.h" +#include "dp_reg.h" +#include "dp_hw.h" + +static void hibmc_dp_set_tu(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 tu_symbol_frac_size; + u32 tu_symbol_size; + u32 rate_ks; + u8 lane_num; + u32 value; + u32 bpp; + + lane_num = dp->link.cap.lanes; + if (lane_num == 0) { + drm_err(dp->dev, "set tu failed, lane num cannot be 0!\n"); + return; + } + + bpp = HIBMC_DP_BPP; + rate_ks = dp->link.cap.link_rate * HIBMC_DP_LINK_RATE_CAL; + value = (mode->clock * bpp * 5) / (61 * lane_num * rate_ks); + + if (value % 10 == 9) { /* 9 carry */ + tu_symbol_size = value / 10 + 1; + tu_symbol_frac_size = 0; + } else { + tu_symbol_size = value / 10; + tu_symbol_frac_size = value % 10 + 1; + } + + drm_dbg_dp(dp->dev, "tu value: %u.%u value: %u\n", + tu_symbol_size, tu_symbol_frac_size, value); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_PACKET, + HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE, tu_symbol_size); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_PACKET, + HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE, tu_symbol_frac_size); +} + +static void hibmc_dp_set_sst(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 hblank_size; + u32 htotal_size; + u32 htotal_int; + u32 hblank_int; + u32 fclk; /* flink_clock */ + + fclk = dp->link.cap.link_rate * HIBMC_DP_LINK_RATE_CAL; + + /* Considering the effect of spread spectrum, the value may be deviated. + * The coefficient (0.9947) is used to offset the deviation. + */ + htotal_int = mode->htotal * 9947 / 10000; + htotal_size = htotal_int * fclk / (HIBMC_DP_SYMBOL_PER_FCLK * (mode->clock / 1000)); + + hblank_int = mode->htotal - mode->hdisplay - mode->hdisplay * 53 / 10000; + hblank_size = hblank_int * fclk * 9947 / + (mode->clock * 10 * HIBMC_DP_SYMBOL_PER_FCLK); + + drm_dbg_dp(dp->dev, "h_active %u v_active %u htotal_size %u hblank_size %u", + mode->hdisplay, mode->vdisplay, htotal_size, hblank_size); + drm_dbg_dp(dp->dev, "flink_clock %u pixel_clock %d", fclk, mode->clock / 1000); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_HORIZONTAL_SIZE, + HIBMC_DP_CFG_STREAM_HTOTAL_SIZE, htotal_size); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_HORIZONTAL_SIZE, + HIBMC_DP_CFG_STREAM_HBLANK_SIZE, hblank_size); +} + +static void hibmc_dp_link_cfg(struct hibmc_dp_dev *dp, struct drm_display_mode *mode) +{ + u32 timing_delay; + u32 vblank; + u32 hstart; + u32 vstart; + + vblank = mode->vtotal - mode->vdisplay; + timing_delay = mode->htotal - mode->hsync_start; + hstart = mode->htotal - mode->hsync_start; + vstart = mode->vtotal - mode->vsync_start; + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG0, + HIBMC_DP_CFG_TIMING_GEN0_HBLANK, mode->htotal - mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG0, + HIBMC_DP_CFG_TIMING_GEN0_HACTIVE, mode->hdisplay); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG2, + HIBMC_DP_CFG_TIMING_GEN0_VBLANK, vblank); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG2, + HIBMC_DP_CFG_TIMING_GEN0_VACTIVE, mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_GEN_CONFIG3, + HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH, + mode->vsync_start - mode->vdisplay); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG0, + HIBMC_DP_CFG_STREAM_HACTIVE, mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG0, + HIBMC_DP_CFG_STREAM_HBLANK, mode->htotal - mode->hdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG2, + HIBMC_DP_CFG_STREAM_HSYNC_WIDTH, + mode->hsync_end - mode->hsync_start); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG1, + HIBMC_DP_CFG_STREAM_VACTIVE, mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG1, + HIBMC_DP_CFG_STREAM_VBLANK, vblank); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG3, + HIBMC_DP_CFG_STREAM_VFRONT_PORCH, + mode->vsync_start - mode->vdisplay); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CONFIG3, + HIBMC_DP_CFG_STREAM_VSYNC_WIDTH, + mode->vsync_end - mode->vsync_start); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_MSA0, + HIBMC_DP_CFG_STREAM_VSTART, vstart); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_MSA0, + HIBMC_DP_CFG_STREAM_HSTART, hstart); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_VSYNC_POLARITY, + mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 : 0); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_HSYNC_POLARITY, + mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 : 0); + + /* MSA mic 0 and 1 */ + writel(HIBMC_DP_MSA1, dp->base + HIBMC_DP_VIDEO_MSA1); + writel(HIBMC_DP_MSA2, dp->base + HIBMC_DP_VIDEO_MSA2); + + hibmc_dp_set_tu(dp, mode); + + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_RGB_ENABLE, 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_VIDEO_MAPPING, 0); + + /* divide 2: up even */ + if (timing_delay % 2) + timing_delay++; + + hibmc_dp_reg_write_field(dp, HIBMC_DP_TIMING_MODEL_CTRL, + HIBMC_DP_CFG_PIXEL_NUM_TIMING_MODE_SEL1, timing_delay); + + hibmc_dp_set_sst(dp, mode); +} + +int hibmc_dp_hw_init(struct hibmc_dp *dp) +{ + struct drm_device *drm_dev = dp->drm_dev; + struct hibmc_dp_dev *dp_dev; + + dp_dev = devm_kzalloc(drm_dev->dev, sizeof(struct hibmc_dp_dev), GFP_KERNEL); + if (!dp_dev) + return -ENOMEM; + + mutex_init(&dp_dev->lock); + + dp->dp_dev = dp_dev; + + dp_dev->dev = drm_dev; + dp_dev->base = dp->mmio + HIBMC_DP_OFFSET; + + hibmc_dp_aux_init(dp_dev); + + dp_dev->link.cap.lanes = 0x2; + dp_dev->link.cap.link_rate = DP_LINK_BW_2_7; + + /* hdcp data */ + writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG); + /* int init */ + writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE); + writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS); + /* rst */ + writel(HIBMC_DP_DPTX_RST, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL); + /* clock enable */ + writel(HIBMC_DP_CLK_EN, dp_dev->base + HIBMC_DP_DPTX_CLK_CTRL); + + return 0; +} + +void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + + if (enable) { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_VIDEO_CTRL, BIT(0), 0x1); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_DPTX_GCTL0, BIT(10), 0x1); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + } else { + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_DPTX_GCTL0, BIT(10), 0); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_VIDEO_CTRL, BIT(0), 0); + writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL); + } + + msleep(50); +} + +int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode) +{ + struct hibmc_dp_dev *dp_dev = dp->dp_dev; + int ret; + + if (!dp_dev->link.status.channel_equalized) { + ret = hibmc_dp_link_training(dp_dev); + if (ret) { + drm_err(dp->drm_dev, "dp link training failed, ret: %d\n", ret); + return ret; + } + } + + hibmc_dp_display_en(dp, false); + hibmc_dp_link_cfg(dp_dev, mode); + + return 0; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h new file mode 100644 index 000000000000..4dc13b3d9875 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_KAPI_H +#define DP_KAPI_H + +#include <linux/types.h> +#include <linux/delay.h> +#include <drm/drm_device.h> +#include <drm/drm_encoder.h> +#include <drm/drm_connector.h> +#include <drm/drm_print.h> + +struct hibmc_dp_dev; + +struct hibmc_dp { + struct hibmc_dp_dev *dp_dev; + struct drm_device *drm_dev; + struct drm_encoder encoder; + struct drm_connector connector; + void __iomem *mmio; +}; + +int hibmc_dp_hw_init(struct hibmc_dp *dp); +int hibmc_dp_mode_set(struct hibmc_dp *dp, struct drm_display_mode *mode); +void hibmc_dp_display_en(struct hibmc_dp *dp, bool enable); + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c new file mode 100644 index 000000000000..f6355c16cc0a --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include <linux/delay.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include "dp_comm.h" +#include "dp_reg.h" + +#define HIBMC_EQ_MAX_RETRY 5 + +static int hibmc_dp_link_training_configure(struct hibmc_dp_dev *dp) +{ + u8 buf[2]; + int ret; + + /* DP 2 lane */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_LANE_DATA_EN, + dp->link.cap.lanes == 0x2 ? 0x3 : 0x1); + hibmc_dp_reg_write_field(dp, HIBMC_DP_DPTX_GCTL0, HIBMC_DP_CFG_PHY_LANE_NUM, + dp->link.cap.lanes == 0x2 ? 0x1 : 0); + + /* enhanced frame */ + hibmc_dp_reg_write_field(dp, HIBMC_DP_VIDEO_CTRL, HIBMC_DP_CFG_STREAM_FRAME_MODE, 0x1); + + /* set rate and lane count */ + buf[0] = dp->link.cap.link_rate; + buf[1] = DP_LANE_COUNT_ENHANCED_FRAME_EN | dp->link.cap.lanes; + ret = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write link rate and lanes failed, ret: %d\n", ret); + return ret >= 0 ? -EIO : ret; + } + + /* set 8b/10b and downspread */ + buf[0] = DP_SPREAD_AMP_0_5; + buf[1] = DP_SET_ANSI_8B10B; + ret = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write 8b/10b and downspread failed, ret: %d\n", ret); + return ret >= 0 ? -EIO : ret; + } + + ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); + if (ret) + drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); + + return ret; +} + +static int hibmc_dp_link_set_pattern(struct hibmc_dp_dev *dp, int pattern) +{ + int ret; + u8 val; + u8 buf; + + buf = (u8)pattern; + if (pattern != DP_TRAINING_PATTERN_DISABLE && pattern != DP_TRAINING_PATTERN_4) { + buf |= DP_LINK_SCRAMBLING_DISABLE; + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_SCRAMBLE_EN, 0x1); + } else { + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_SCRAMBLE_EN, 0); + } + + switch (pattern) { + case DP_TRAINING_PATTERN_DISABLE: + val = 0; + break; + case DP_TRAINING_PATTERN_1: + val = 1; + break; + case DP_TRAINING_PATTERN_2: + val = 2; + break; + case DP_TRAINING_PATTERN_3: + val = 3; + break; + case DP_TRAINING_PATTERN_4: + val = 4; + break; + default: + return -EINVAL; + } + + hibmc_dp_reg_write_field(dp, HIBMC_DP_PHYIF_CTRL0, HIBMC_DP_CFG_PAT_SEL, val); + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_PATTERN_SET, &buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_dbg_dp(dp->dev, "dp aux write training pattern set failed\n"); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static int hibmc_dp_link_training_cr_pre(struct hibmc_dp_dev *dp) +{ + u8 *train_set = dp->link.train_set; + int ret; + u8 i; + + ret = hibmc_dp_link_training_configure(dp); + if (ret) + return ret; + + ret = hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + return ret; + + for (i = 0; i < dp->link.cap.lanes; i++) + train_set[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, train_set, dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "dp aux write training lane set failed\n"); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static bool hibmc_dp_link_get_adjust_train(struct hibmc_dp_dev *dp, + u8 lane_status[DP_LINK_STATUS_SIZE]) +{ + u8 train_set[HIBMC_DP_LANE_NUM_MAX] = {0}; + u8 lane; + + for (lane = 0; lane < dp->link.cap.lanes; lane++) + train_set[lane] = drm_dp_get_adjust_request_voltage(lane_status, lane) | + drm_dp_get_adjust_request_pre_emphasis(lane_status, lane); + + if (memcmp(dp->link.train_set, train_set, HIBMC_DP_LANE_NUM_MAX)) { + memcpy(dp->link.train_set, train_set, HIBMC_DP_LANE_NUM_MAX); + return true; + } + + return false; +} + +static inline int hibmc_dp_link_reduce_rate(struct hibmc_dp_dev *dp) +{ + switch (dp->link.cap.link_rate) { + case DP_LINK_BW_2_7: + dp->link.cap.link_rate = DP_LINK_BW_1_62; + return 0; + case DP_LINK_BW_5_4: + dp->link.cap.link_rate = DP_LINK_BW_2_7; + return 0; + case DP_LINK_BW_8_1: + dp->link.cap.link_rate = DP_LINK_BW_5_4; + return 0; + default: + return -EINVAL; + } +} + +static inline int hibmc_dp_link_reduce_lane(struct hibmc_dp_dev *dp) +{ + switch (dp->link.cap.lanes) { + case 0x2: + dp->link.cap.lanes--; + break; + case 0x1: + drm_err(dp->dev, "dp link training reduce lane failed, already reach minimum\n"); + return -EIO; + default: + return -EINVAL; + } + + return 0; +} + +static int hibmc_dp_link_training_cr(struct hibmc_dp_dev *dp) +{ + u8 lane_status[DP_LINK_STATUS_SIZE] = {0}; + bool level_changed; + u32 voltage_tries; + u32 cr_tries; + int ret; + + /* + * DP 1.4 spec define 10 for maxtries value, for pre DP 1.4 version set a limit of 80 + * (4 voltage levels x 4 preemphasis levels x 5 identical voltage retries) + */ + + voltage_tries = 1; + for (cr_tries = 0; cr_tries < 80; cr_tries++) { + drm_dp_link_train_clock_recovery_delay(&dp->aux, dp->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + if (ret != DP_LINK_STATUS_SIZE) { + drm_err(dp->dev, "Get lane status failed\n"); + return ret; + } + + if (drm_dp_clock_recovery_ok(lane_status, dp->link.cap.lanes)) { + drm_dbg_dp(dp->dev, "dp link training cr done\n"); + dp->link.status.clock_recovered = true; + return 0; + } + + if (voltage_tries == 5) { + drm_dbg_dp(dp->dev, "same voltage tries 5 times\n"); + dp->link.status.clock_recovered = false; + return 0; + } + + level_changed = hibmc_dp_link_get_adjust_train(dp, lane_status); + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, dp->link.train_set, + dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "Update link training failed\n"); + return ret >= 0 ? -EIO : ret; + } + + voltage_tries = level_changed ? 1 : voltage_tries + 1; + } + + drm_err(dp->dev, "dp link training clock recovery 80 times failed\n"); + dp->link.status.clock_recovered = false; + + return 0; +} + +static int hibmc_dp_link_training_channel_eq(struct hibmc_dp_dev *dp) +{ + u8 lane_status[DP_LINK_STATUS_SIZE] = {0}; + u8 eq_tries; + int ret; + + ret = hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_2); + if (ret) + return ret; + + for (eq_tries = 0; eq_tries < HIBMC_EQ_MAX_RETRY; eq_tries++) { + drm_dp_link_train_channel_eq_delay(&dp->aux, dp->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, lane_status); + if (ret != DP_LINK_STATUS_SIZE) { + drm_err(dp->dev, "get lane status failed\n"); + break; + } + + if (!drm_dp_clock_recovery_ok(lane_status, dp->link.cap.lanes)) { + drm_dbg_dp(dp->dev, "clock recovery check failed\n"); + drm_dbg_dp(dp->dev, "cannot continue channel equalization\n"); + dp->link.status.clock_recovered = false; + break; + } + + if (drm_dp_channel_eq_ok(lane_status, dp->link.cap.lanes)) { + dp->link.status.channel_equalized = true; + drm_dbg_dp(dp->dev, "dp link training eq done\n"); + break; + } + + hibmc_dp_link_get_adjust_train(dp, lane_status); + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, + dp->link.train_set, dp->link.cap.lanes); + if (ret != dp->link.cap.lanes) { + drm_dbg_dp(dp->dev, "Update link training failed\n"); + ret = (ret >= 0) ? -EIO : ret; + break; + } + } + + if (eq_tries == HIBMC_EQ_MAX_RETRY) + drm_err(dp->dev, "channel equalization failed %u times\n", eq_tries); + + hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + + return ret < 0 ? ret : 0; +} + +static int hibmc_dp_link_downgrade_training_cr(struct hibmc_dp_dev *dp) +{ + if (hibmc_dp_link_reduce_rate(dp)) + return hibmc_dp_link_reduce_lane(dp); + + return 0; +} + +static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) +{ + if ((dp->link.status.clock_recovered && !dp->link.status.channel_equalized)) { + if (!hibmc_dp_link_reduce_lane(dp)) + return 0; + } + + return hibmc_dp_link_reduce_rate(dp); +} + +int hibmc_dp_link_training(struct hibmc_dp_dev *dp) +{ + struct hibmc_dp_link *link = &dp->link; + int ret; + + while (true) { + ret = hibmc_dp_link_training_cr_pre(dp); + if (ret) + goto err; + + ret = hibmc_dp_link_training_cr(dp); + if (ret) + goto err; + + if (!link->status.clock_recovered) { + ret = hibmc_dp_link_downgrade_training_cr(dp); + if (ret) + goto err; + continue; + } + + ret = hibmc_dp_link_training_channel_eq(dp); + if (ret) + goto err; + + if (!link->status.channel_equalized) { + ret = hibmc_dp_link_downgrade_training_eq(dp); + if (ret) + goto err; + continue; + } + + return 0; + } + +err: + hibmc_dp_link_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + + return ret; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h new file mode 100644 index 000000000000..4a515c726d52 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef DP_REG_H +#define DP_REG_H + +#define HIBMC_DP_AUX_CMD_ADDR 0x50 +#define HIBMC_DP_AUX_WR_DATA0 0x54 +#define HIBMC_DP_AUX_WR_DATA1 0x58 +#define HIBMC_DP_AUX_WR_DATA2 0x5c +#define HIBMC_DP_AUX_WR_DATA3 0x60 +#define HIBMC_DP_AUX_RD_DATA0 0x64 +#define HIBMC_DP_AUX_REQ 0x74 +#define HIBMC_DP_AUX_STATUS 0x78 +#define HIBMC_DP_PHYIF_CTRL0 0xa0 +#define HIBMC_DP_VIDEO_CTRL 0x100 +#define HIBMC_DP_VIDEO_CONFIG0 0x104 +#define HIBMC_DP_VIDEO_CONFIG1 0x108 +#define HIBMC_DP_VIDEO_CONFIG2 0x10c +#define HIBMC_DP_VIDEO_CONFIG3 0x110 +#define HIBMC_DP_VIDEO_PACKET 0x114 +#define HIBMC_DP_VIDEO_MSA0 0x118 +#define HIBMC_DP_VIDEO_MSA1 0x11c +#define HIBMC_DP_VIDEO_MSA2 0x120 +#define HIBMC_DP_VIDEO_HORIZONTAL_SIZE 0X124 +#define HIBMC_DP_TIMING_GEN_CONFIG0 0x26c +#define HIBMC_DP_TIMING_GEN_CONFIG2 0x274 +#define HIBMC_DP_TIMING_GEN_CONFIG3 0x278 +#define HIBMC_DP_HDCP_CFG 0x600 +#define HIBMC_DP_DPTX_RST_CTRL 0x700 +#define HIBMC_DP_DPTX_CLK_CTRL 0x704 +#define HIBMC_DP_DPTX_GCTL0 0x708 +#define HIBMC_DP_INTR_ENABLE 0x720 +#define HIBMC_DP_INTR_ORIGINAL_STATUS 0x728 +#define HIBMC_DP_TIMING_MODEL_CTRL 0x884 +#define HIBMC_DP_TIMING_SYNC_CTRL 0xFF0 + +#define HIBMC_DP_CFG_AUX_SYNC_LEN_SEL BIT(1) +#define HIBMC_DP_CFG_AUX_TIMER_TIMEOUT BIT(2) +#define HIBMC_DP_CFG_STREAM_FRAME_MODE BIT(6) +#define HIBMC_DP_CFG_AUX_MIN_PULSE_NUM GENMASK(13, 9) +#define HIBMC_DP_CFG_LANE_DATA_EN GENMASK(11, 8) +#define HIBMC_DP_CFG_PHY_LANE_NUM GENMASK(2, 1) +#define HIBMC_DP_CFG_AUX_REQ BIT(0) +#define HIBMC_DP_CFG_AUX_RST_N BIT(4) +#define HIBMC_DP_CFG_AUX_TIMEOUT BIT(0) +#define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12) +#define HIBMC_DP_CFG_AUX GENMASK(24, 17) +#define HIBMC_DP_CFG_AUX_STATUS GENMASK(11, 4) +#define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0) +#define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4) +#define HIBMC_DP_CFG_TIMING_GEN0_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_HBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_TIMING_GEN0_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_TIMING_GEN0_VBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_TIMING_GEN0_VFRONT_PORCH GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_HSYNC_WIDTH GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VACTIVE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_VBLANK GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VFRONT_PORCH GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_VSYNC_WIDTH GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VSTART GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HSTART GENMASK(15, 0) +#define HIBMC_DP_CFG_STREAM_VSYNC_POLARITY BIT(8) +#define HIBMC_DP_CFG_STREAM_HSYNC_POLARITY BIT(7) +#define HIBMC_DP_CFG_STREAM_RGB_ENABLE BIT(1) +#define HIBMC_DP_CFG_STREAM_VIDEO_MAPPING GENMASK(5, 2) +#define HIBMC_DP_CFG_PIXEL_NUM_TIMING_MODE_SEL1 GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_SIZE GENMASK(5, 0) +#define HIBMC_DP_CFG_STREAM_TU_SYMBOL_FRAC_SIZE GENMASK(9, 6) +#define HIBMC_DP_CFG_STREAM_HTOTAL_SIZE GENMASK(31, 16) +#define HIBMC_DP_CFG_STREAM_HBLANK_SIZE GENMASK(15, 0) + +#endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c new file mode 100644 index 000000000000..603d6b198a54 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Hisilicon Limited. + +#include <linux/io.h> + +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_modes.h> +#include <drm/drm_drv.h> +#include <drm/drm_edid.h> + +#include "hibmc_drm_drv.h" +#include "dp/dp_hw.h" + +static int hibmc_dp_connector_get_modes(struct drm_connector *connector) +{ + int count; + + count = drm_add_modes_noedid(connector, connector->dev->mode_config.max_width, + connector->dev->mode_config.max_height); + drm_set_preferred_mode(connector, 1024, 768); // temporary implementation + + return count; +} + +static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = { + .get_modes = hibmc_dp_connector_get_modes, +}; + +static const struct drm_connector_funcs hibmc_dp_conn_funcs = { + .reset = drm_atomic_helper_connector_reset, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static inline int hibmc_dp_prepare(struct hibmc_dp *dp, struct drm_display_mode *mode) +{ + int ret; + + hibmc_dp_display_en(dp, false); + + ret = hibmc_dp_mode_set(dp, mode); + if (ret) + drm_err(dp->drm_dev, "hibmc dp mode set failed: %d\n", ret); + + return ret; +} + +static void hibmc_dp_encoder_enable(struct drm_encoder *drm_encoder, + struct drm_atomic_state *state) +{ + struct hibmc_dp *dp = container_of(drm_encoder, struct hibmc_dp, encoder); + struct drm_display_mode *mode = &drm_encoder->crtc->state->mode; + + if (hibmc_dp_prepare(dp, mode)) + return; + + hibmc_dp_display_en(dp, true); +} + +static void hibmc_dp_encoder_disable(struct drm_encoder *drm_encoder, + struct drm_atomic_state *state) +{ + struct hibmc_dp *dp = container_of(drm_encoder, struct hibmc_dp, encoder); + + hibmc_dp_display_en(dp, false); +} + +static const struct drm_encoder_helper_funcs hibmc_dp_encoder_helper_funcs = { + .atomic_enable = hibmc_dp_encoder_enable, + .atomic_disable = hibmc_dp_encoder_disable, +}; + +int hibmc_dp_init(struct hibmc_drm_private *priv) +{ + struct drm_device *dev = &priv->dev; + struct drm_crtc *crtc = &priv->crtc; + struct hibmc_dp *dp = &priv->dp; + struct drm_connector *connector = &dp->connector; + struct drm_encoder *encoder = &dp->encoder; + int ret; + + dp->mmio = priv->mmio; + dp->drm_dev = dev; + + ret = hibmc_dp_hw_init(&priv->dp); + if (ret) { + drm_err(dev, "hibmc dp hw init failed: %d\n", ret); + return ret; + } + + hibmc_dp_display_en(&priv->dp, false); + + encoder->possible_crtcs = drm_crtc_mask(crtc); + ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_TMDS, NULL); + if (ret) { + drm_err(dev, "init dp encoder failed: %d\n", ret); + return ret; + } + + drm_encoder_helper_add(encoder, &hibmc_dp_encoder_helper_funcs); + + ret = drm_connector_init(dev, connector, &hibmc_dp_conn_funcs, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + drm_err(dev, "init dp connector failed: %d\n", ret); + return ret; + } + + drm_connector_helper_add(connector, &hibmc_dp_conn_helper_funcs); + + drm_connector_attach_encoder(connector, encoder); + + return 0; +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 7f814c32ed34..e6de6d5edf6b 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -28,6 +28,10 @@ #include "hibmc_drm_drv.h" #include "hibmc_drm_regs.h" +#define HIBMC_DP_HOST_SERDES_CTRL 0x1f001c +#define HIBMC_DP_HOST_SERDES_CTRL_VAL 0x8a00 +#define HIBMC_DP_HOST_SERDES_CTRL_MASK 0x7ffff + DEFINE_DRM_GEM_FOPS(hibmc_fops); static irqreturn_t hibmc_interrupt(int irq, void *arg) @@ -117,6 +121,14 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + /* if DP existed, init DP */ + if ((readl(priv->mmio + HIBMC_DP_HOST_SERDES_CTRL) & + HIBMC_DP_HOST_SERDES_CTRL_MASK) == HIBMC_DP_HOST_SERDES_CTRL_VAL) { + ret = hibmc_dp_init(priv); + if (ret) + drm_err(dev, "failed to init dp: %d\n", ret); + } + ret = hibmc_vdac_init(priv); if (ret) { drm_err(dev, "failed to init vdac: %d\n", ret); @@ -327,6 +339,8 @@ static int hibmc_pci_probe(struct pci_dev *pdev, goto err_return; } + pci_set_master(pdev); + ret = hibmc_load(dev); if (ret) { drm_err(dev, "failed to load hibmc: %d\n", ret); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 6b566f3aeecb..d982f1e4b958 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -20,9 +20,12 @@ #include <drm/drm_framebuffer.h> -struct hibmc_connector { - struct drm_connector base; +#include "dp/dp_hw.h" +struct hibmc_vdac { + struct drm_device *dev; + struct drm_encoder encoder; + struct drm_connector connector; struct i2c_adapter adapter; struct i2c_algo_bit_data bit_data; }; @@ -35,13 +38,13 @@ struct hibmc_drm_private { struct drm_device dev; struct drm_plane primary_plane; struct drm_crtc crtc; - struct drm_encoder encoder; - struct hibmc_connector connector; + struct hibmc_vdac vdac; + struct hibmc_dp dp; }; -static inline struct hibmc_connector *to_hibmc_connector(struct drm_connector *connector) +static inline struct hibmc_vdac *to_hibmc_vdac(struct drm_connector *connector) { - return container_of(connector, struct hibmc_connector, base); + return container_of(connector, struct hibmc_vdac, connector); } static inline struct hibmc_drm_private *to_hibmc_drm_private(struct drm_device *dev) @@ -57,6 +60,8 @@ void hibmc_set_current_gate(struct hibmc_drm_private *priv, int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); -int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector); +int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); + +int hibmc_dp_init(struct hibmc_drm_private *priv); #endif diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index e6e48651c15c..99b3b77b5445 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -25,8 +25,8 @@ static void hibmc_set_i2c_signal(void *data, u32 mask, int value) { - struct hibmc_connector *hibmc_connector = data; - struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + struct hibmc_vdac *vdac = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(vdac->connector.dev); u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); if (value) { @@ -45,8 +45,8 @@ static void hibmc_set_i2c_signal(void *data, u32 mask, int value) static int hibmc_get_i2c_signal(void *data, u32 mask) { - struct hibmc_connector *hibmc_connector = data; - struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + struct hibmc_vdac *vdac = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(vdac->connector.dev); u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); if ((tmp_dir & mask) != mask) { @@ -77,22 +77,21 @@ static int hibmc_ddc_getscl(void *data) return hibmc_get_i2c_signal(data, I2C_SCL_MASK); } -int hibmc_ddc_create(struct drm_device *drm_dev, - struct hibmc_connector *connector) +int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) { - connector->adapter.owner = THIS_MODULE; - snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); - connector->adapter.dev.parent = drm_dev->dev; - i2c_set_adapdata(&connector->adapter, connector); - connector->adapter.algo_data = &connector->bit_data; - - connector->bit_data.udelay = 20; - connector->bit_data.timeout = usecs_to_jiffies(2000); - connector->bit_data.data = connector; - connector->bit_data.setsda = hibmc_ddc_setsda; - connector->bit_data.setscl = hibmc_ddc_setscl; - connector->bit_data.getsda = hibmc_ddc_getsda; - connector->bit_data.getscl = hibmc_ddc_getscl; - - return i2c_bit_add_bus(&connector->adapter); + vdac->adapter.owner = THIS_MODULE; + snprintf(vdac->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); + vdac->adapter.dev.parent = drm_dev->dev; + i2c_set_adapdata(&vdac->adapter, vdac); + vdac->adapter.algo_data = &vdac->bit_data; + + vdac->bit_data.udelay = 20; + vdac->bit_data.timeout = usecs_to_jiffies(2000); + vdac->bit_data.data = vdac; + vdac->bit_data.setsda = hibmc_ddc_setsda; + vdac->bit_data.setscl = hibmc_ddc_setscl; + vdac->bit_data.getsda = hibmc_ddc_getsda; + vdac->bit_data.getscl = hibmc_ddc_getscl; + + return i2c_bit_add_bus(&vdac->adapter); } diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index 409c551c92af..05e19ea4c9f9 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -24,11 +24,11 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) { - struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + struct hibmc_vdac *vdac = to_hibmc_vdac(connector); const struct drm_edid *drm_edid; int count; - drm_edid = drm_edid_read_ddc(connector, &hibmc_connector->adapter); + drm_edid = drm_edid_read_ddc(connector, &vdac->adapter); drm_edid_connector_update(connector, drm_edid); @@ -51,9 +51,9 @@ out: static void hibmc_connector_destroy(struct drm_connector *connector) { - struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&hibmc_connector->adapter); + i2c_del_adapter(&vdac->adapter); drm_connector_cleanup(connector); } @@ -93,20 +93,20 @@ static const struct drm_encoder_helper_funcs hibmc_encoder_helper_funcs = { int hibmc_vdac_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; - struct hibmc_connector *hibmc_connector = &priv->connector; - struct drm_encoder *encoder = &priv->encoder; + struct hibmc_vdac *vdac = &priv->vdac; + struct drm_encoder *encoder = &vdac->encoder; struct drm_crtc *crtc = &priv->crtc; - struct drm_connector *connector = &hibmc_connector->base; + struct drm_connector *connector = &vdac->connector; int ret; - ret = hibmc_ddc_create(dev, hibmc_connector); + ret = hibmc_ddc_create(dev, vdac); if (ret) { drm_err(dev, "failed to create ddc: %d\n", ret); return ret; } encoder->possible_crtcs = drm_crtc_mask(crtc); - ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_DAC); + ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_DAC, NULL); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); return ret; @@ -117,7 +117,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drm_connector_init_with_ddc(dev, connector, &hibmc_connector_funcs, DRM_MODE_CONNECTOR_VGA, - &hibmc_connector->adapter); + &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); return ret; diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 57ce77c2be24..82d4a4e206a5 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -1165,7 +1165,6 @@ static const struct hdmi_codec_ops audio_codec_ops = { .audio_shutdown = tda998x_audio_shutdown, .mute_stream = tda998x_audio_mute_stream, .get_eld = tda998x_audio_get_eld, - .no_capture_mute = 1, }; static int tda998x_audio_codec_init(struct tda998x_priv *priv, @@ -1176,6 +1175,7 @@ static int tda998x_audio_codec_init(struct tda998x_priv *priv, .max_i2s_channels = 2, .no_i2s_capture = 1, .no_spdif_capture = 1, + .no_capture_mute = 1, }; if (priv->audio_port_enable[AUDIO_ROUTE_I2S]) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 144d4311c0e8..48b0b9755b2b 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3081,7 +3081,10 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder) val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port)); - clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); + if (DISPLAY_VER(display) >= 30) + clock = REG_FIELD_GET(XE3_DDI_CLOCK_SELECT_MASK, val); + else + clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); drm_WARN_ON(display->drm, !(val & XELPDP_FORWARD_CLOCK_UNGATE)); drm_WARN_ON(display->drm, !(val & XELPDP_TBT_CLOCK_REQUEST)); @@ -3096,13 +3099,18 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder) return 540000; case XELPDP_DDI_CLOCK_SELECT_TBT_810: return 810000; + case XELPDP_DDI_CLOCK_SELECT_TBT_312_5: + return 1000000; + case XELPDP_DDI_CLOCK_SELECT_TBT_625: + return 2000000; default: MISSING_CASE(clock); return 162000; } } -static int intel_mtl_tbt_clock_select(int clock) +static int intel_mtl_tbt_clock_select(struct intel_display *display, + int clock) { switch (clock) { case 162000: @@ -3113,6 +3121,18 @@ static int intel_mtl_tbt_clock_select(int clock) return XELPDP_DDI_CLOCK_SELECT_TBT_540; case 810000: return XELPDP_DDI_CLOCK_SELECT_TBT_810; + case 1000000: + if (DISPLAY_VER(display) < 30) { + drm_WARN_ON(display->drm, "UHBR10 not supported for the platform\n"); + return XELPDP_DDI_CLOCK_SELECT_TBT_162; + } + return XELPDP_DDI_CLOCK_SELECT_TBT_312_5; + case 2000000: + if (DISPLAY_VER(display) < 30) { + drm_WARN_ON(display->drm, "UHBR20 not supported for the platform\n"); + return XELPDP_DDI_CLOCK_SELECT_TBT_162; + } + return XELPDP_DDI_CLOCK_SELECT_TBT_625; default: MISSING_CASE(clock); return XELPDP_DDI_CLOCK_SELECT_TBT_162; @@ -3125,15 +3145,26 @@ static void intel_mtl_tbt_pll_enable(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); u32 val = 0; + u32 mask; /* * 1. Program PORT_CLOCK_CTL REGISTER to configure * clock muxes, gating and SSC */ - val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(crtc_state->port_clock)); + + if (DISPLAY_VER(display) >= 30) { + mask = XE3_DDI_CLOCK_SELECT_MASK; + val |= XE3_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); + } else { + mask = XELPDP_DDI_CLOCK_SELECT_MASK; + val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); + } + + mask |= XELPDP_FORWARD_CLOCK_UNGATE; val |= XELPDP_FORWARD_CLOCK_UNGATE; + intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_FORWARD_CLOCK_UNGATE, val); + mask, val); /* 2. Read back PORT_CLOCK_CTL REGISTER */ val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port)); diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 5794ece42e92..4a3cf08007e3 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -192,7 +192,9 @@ #define XELPDP_TBT_CLOCK_REQUEST REG_BIT(19) #define XELPDP_TBT_CLOCK_ACK REG_BIT(18) #define XELPDP_DDI_CLOCK_SELECT_MASK REG_GENMASK(15, 12) +#define XE3_DDI_CLOCK_SELECT_MASK REG_GENMASK(16, 12) #define XELPDP_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XELPDP_DDI_CLOCK_SELECT_MASK, val) +#define XE3_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XE3_DDI_CLOCK_SELECT_MASK, val) #define XELPDP_DDI_CLOCK_SELECT_NONE 0x0 #define XELPDP_DDI_CLOCK_SELECT_MAXPCLK 0x8 #define XELPDP_DDI_CLOCK_SELECT_DIV18CLK 0x9 @@ -200,6 +202,8 @@ #define XELPDP_DDI_CLOCK_SELECT_TBT_270 0xd #define XELPDP_DDI_CLOCK_SELECT_TBT_540 0xe #define XELPDP_DDI_CLOCK_SELECT_TBT_810 0xf +#define XELPDP_DDI_CLOCK_SELECT_TBT_312_5 0x18 +#define XELPDP_DDI_CLOCK_SELECT_TBT_625 0x19 #define XELPDP_FORWARD_CLOCK_UNGATE REG_BIT(10) #define XELPDP_LANE1_PHY_CLOCK_SELECT REG_BIT(8) #define XELPDP_SSC_ENABLE_PLLA REG_BIT(1) diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 73ea0e906014..c4120a834698 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -397,7 +397,6 @@ void intel_display_driver_resume_access(struct intel_display *display) */ bool intel_display_driver_check_access(struct intel_display *display) { - char comm[TASK_COMM_LEN]; char current_task[TASK_COMM_LEN + 16]; char allowed_task[TASK_COMM_LEN + 16] = "none"; @@ -406,12 +405,11 @@ bool intel_display_driver_check_access(struct intel_display *display) return true; snprintf(current_task, sizeof(current_task), "%s[%d]", - get_task_comm(comm, current), - task_pid_vnr(current)); + current->comm, task_pid_vnr(current)); if (display->access.allowed_task) snprintf(allowed_task, sizeof(allowed_task), "%s[%d]", - get_task_comm(comm, display->access.allowed_task), + display->access.allowed_task->comm, task_pid_vnr(display->access.allowed_task)); drm_dbg_kms(display->drm, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 3198b64ad7db..388f90784d8a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -53,29 +53,6 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) } /** - * __i915_gem_object_is_lmem - Whether the object is resident in - * lmem while in the fence signaling critical path. - * @obj: The object to check. - * - * This function is intended to be called from within the fence signaling - * path where the fence, or a pin, keeps the object from being migrated. For - * example during gpu reset or similar. - * - * Return: Whether the object is resident in lmem. - */ -bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) -{ - struct intel_memory_region *mr = READ_ONCE(obj->mm.region); - -#ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) && - i915_gem_object_evictable(obj)); -#endif - return mr && (mr->type == INTEL_MEMORY_LOCAL || - mr->type == INTEL_MEMORY_STOLEN_LOCAL); -} - -/** * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the * minimum page size for the backing pages. * @i915: The i915 instance. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 5a7a14e85c3f..ecd8f1a633a1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -19,8 +19,6 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); -bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); - struct drm_i915_gem_object * i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, const void *data, size_t size); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 89d4dc8b60c6..eb0158e43417 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -369,7 +369,7 @@ static int live_parallel_switch(void *arg) if (!data[n].ce[0]) continue; - worker = kthread_create_worker(0, "igt/parallel:%s", + worker = kthread_run_worker(0, "igt/parallel:%s", data[n].ce[0]->engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 33b55c581552..5a625518d1a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1113,6 +1113,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * Warn CI about the unrecoverable wedged condition. * Time for a reboot. */ + gt_err(gt, "Unrecoverable wedged condition\n"); add_taint_for_CI(gt->i915, TAINT_WARN); return false; } @@ -1264,8 +1265,10 @@ void intel_gt_reset(struct intel_gt *gt, } ret = resume(gt); - if (ret) + if (ret) { + gt_err(gt, "Failed to resume (%d)\n", ret); goto taint; + } finish: reset_finish(gt, awake); @@ -1608,6 +1611,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ + gt_err(gt, "Non-recoverable wedged on init\n"); add_taint_for_CI(gt->i915, TAINT_WARN); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 59da4b7bd262..b74d9205c0f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -308,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) return cs; } -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs + num_dwords); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_ring.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 1b32dadfb8c3..64b322e25f36 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -16,7 +16,6 @@ struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); -int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index f0b75986afb9..6e9977b2d180 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -26,6 +26,7 @@ #include "shmem_utils.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -230,8 +231,13 @@ static int xcs_resume(struct intel_engine_cs *engine) set_pp_dir(engine); - /* First wake the ring up to an empty/idle ring */ - for ((kt) = ktime_get() + (2 * NSEC_PER_MSEC); + /* + * First wake the ring up to an empty/idle ring. + * Use 50ms of delay to let the engine write successfully + * for all platforms. Experimented with different values and + * determined that 50ms works best based on testing. + */ + for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC); ktime_before(ktime_get(), (kt)); cpu_relax()) { /* * In case of resets fails because engine resumes from @@ -282,16 +288,16 @@ static int xcs_resume(struct intel_engine_cs *engine) return 0; err: - drm_err(&engine->i915->drm, - "%s initialization failed; " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); + gt_err(engine->gt, "%s initialization failed\n", engine->name); + ENGINE_TRACE(engine, + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + GEM_TRACE_DUMP(); return -EIO; } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 07bc0bbee20f..d7717de17ecc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + worker[id] = kthread_run_worker(0, "igt/smoke:%d", id); if (IS_ERR(worker[id])) { err = PTR_ERR(worker[id]); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index d99061735c58..f057c16410e7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - worker = kthread_create_worker(0, "igt/%s", + worker = kthread_run_worker(0, "igt/%s", other->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index ca460cee4f8b..1bf7b88d9a9d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate, { struct drm_i915_private *i915 = migrate->context->engine->i915; struct drm_i915_gem_object *obj; - struct i915_request *rq; + struct i915_request *rq = NULL; struct i915_gem_ww_ctx ww; u32 *vaddr, val = 0; bool ccs_cap = false; diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index b272a7d44599..908483ab0bc8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -8,6 +8,7 @@ #include "intel_gpu_commands.h" #include "intel_gt_requests.h" #include "intel_ring.h" +#include "intel_rps.h" #include "selftest_rc6.h" #include "selftests/i915_random.h" @@ -38,6 +39,9 @@ int live_rc6_manual(void *arg) ktime_t dt; u64 res[2]; int err = 0; + u32 rc0_freq = 0; + u32 rc6_freq = 0; + struct intel_rps *rps = >->rps; /* * Our claim is that we can "encourage" the GPU to enter rc6 at will. @@ -66,6 +70,7 @@ int live_rc6_manual(void *arg) rc0_power = librapl_energy_uJ() - rc0_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); + rc0_freq = intel_rps_read_actual_frequency_fw(rps); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n", (res[1] - res[0]) >> 10); @@ -77,7 +82,11 @@ int live_rc6_manual(void *arg) rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt)); if (!rc0_power) { - pr_err("No power measured while in RC0\n"); + if (rc0_freq) + pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n", + rc0_freq); + else + pr_err("No power and freq measured while in RC0\n"); err = -EINVAL; goto out_unlock; } @@ -90,7 +99,8 @@ int live_rc6_manual(void *arg) intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); rc6_power = librapl_energy_uJ(); - msleep(100); + msleep(1000); + rc6_freq = intel_rps_read_actual_frequency_fw(rps); rc6_power = librapl_energy_uJ() - rc6_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); @@ -108,7 +118,8 @@ int live_rc6_manual(void *arg) pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", rc0_power, rc6_power); if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6!\n"); + pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n", + rc6_freq, rc0_freq); err = -EINVAL; goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4ecc4ae74a54..e218b229681f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -489,7 +489,7 @@ static int live_slpc_tile_interaction(void *arg) return -ENOMEM; for_each_gt(gt, i915, i) { - threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id); + threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id); if (IS_ERR(threads[i].worker)) { ret = PTR_ERR(threads[i].worker); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 222c95f62156..e8a04e476c57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -18,7 +18,7 @@ #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d7ac31c3254c..b3cbf85c00cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -427,19 +427,6 @@ void intel_huc_fini(struct intel_huc *huc) intel_uc_fw_fini(&huc->fw); } -void intel_huc_suspend(struct intel_huc *huc) -{ - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - /* - * in the unlikely case that we're suspending before the GSC has - * completed its loading sequence, just stop waiting. We'll restart - * on resume. - */ - delayed_huc_load_complete(huc); -} - static const char *auth_mode_string(struct intel_huc *huc, enum intel_huc_authentication_type type) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index ba5cb08e9e7b..d5e441b9e08d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -57,7 +57,6 @@ int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); -void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); int intel_huc_wait_for_auth_complete(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 35319228bc51..0dbc4e289300 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -527,24 +527,6 @@ int i915_active_acquire(struct i915_active *ref) return err; } -int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) -{ - struct i915_active_fence *active; - int err; - - err = i915_active_acquire(ref); - if (err) - return err; - - active = active_instance(ref, idx); - if (!active) { - i915_active_release(ref); - return -ENOMEM; - } - - return 0; /* return with active ref */ -} - void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 77c676ecc263..821f7c21ea9b 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -186,7 +186,6 @@ int i915_request_await_active(struct i915_request *rq, #define I915_ACTIVE_AWAIT_BARRIER BIT(2) int i915_active_acquire(struct i915_active *ref); -int i915_active_acquire_for_context(struct i915_active *ref, u64 idx); bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index f58682505491..168d7375304b 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -102,6 +102,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) for_each_memory_region(mr, i915, id) drm_print_memory_stats(p, &stats[id], + DRM_GEM_OBJECT_ACTIVE | DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_PURGEABLE, mr->uabi_name); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index eb7b4e21b124..bec164e884ae 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4807,7 +4807,7 @@ err_unlock: return ret; } -static struct ctl_table oa_table[] = { +static const struct ctl_table oa_table[] = { { .procname = "perf_stream_paranoid", .data = &i915_perf_stream_paranoid, diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index acae30a04a94..88870844b5bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -492,7 +492,7 @@ static int mock_breadcrumbs_smoketest(void *arg) for (n = 0; n < ncpus; n++) { struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/%d", n); + worker = kthread_run_worker(0, "igt/%d", n); if (IS_ERR(worker)) { ret = PTR_ERR(worker); ncpus = n; @@ -1645,7 +1645,7 @@ static int live_parallel_engines(void *arg) for_each_uabi_engine(engine, i915) { struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/parallel:%s", + worker = kthread_run_worker(0, "igt/parallel:%s", engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); @@ -1806,7 +1806,7 @@ static int live_breadcrumbs_smoketest(void *arg) unsigned int i = idx * ncpus + n; struct kthread_worker *worker; - worker = kthread_create_worker(0, "igt/%d.%d", idx, n); + worker = kthread_run_worker(0, "igt/%d.%d", idx, n); if (IS_ERR(worker)) { ret = PTR_ERR(worker); goto out_flush; @@ -3219,7 +3219,7 @@ static int perf_parallel_engines(void *arg) memset(&engines[idx].p, 0, sizeof(engines[idx].p)); - worker = kthread_create_worker(0, "igt:%s", + worker = kthread_run_worker(0, "igt:%s", engine->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index f496e6cfdfe0..e47debd60619 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -14,9 +14,6 @@ config DRM_MEDIATEK select DRM_BRIDGE_CONNECTOR select DRM_MIPI_DSI select DRM_PANEL - select MEMORY - select MTK_SMI - select PHY_MTK_MIPI_DSI select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. @@ -27,7 +24,6 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" depends on DRM_MEDIATEK - select PHY_MTK_DP select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_DP_AUX_BUS @@ -38,6 +34,5 @@ config DRM_MEDIATEK_HDMI tristate "DRM HDMI Support for Mediatek SoCs" depends on DRM_MEDIATEK select SND_SOC_HDMI_CODEC if SND_SOC - select PHY_MTK_HDMI help DRM/KMS HDMI driver for Mediatek SoCs diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index eb0e1233ad04..5674f5707cca 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -112,6 +112,11 @@ static void mtk_drm_finish_page_flip(struct mtk_crtc *mtk_crtc) drm_crtc_handle_vblank(&mtk_crtc->base); +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + if (mtk_crtc->cmdq_client.chan) + return; +#endif + spin_lock_irqsave(&mtk_crtc->config_lock, flags); if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) { mtk_crtc_finish_page_flip(mtk_crtc); @@ -284,10 +289,8 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) state = to_mtk_crtc_state(mtk_crtc->base.state); spin_lock_irqsave(&mtk_crtc->config_lock, flags); - if (mtk_crtc->config_updating) { - spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + if (mtk_crtc->config_updating) goto ddp_cmdq_cb_out; - } state->pending_config = false; @@ -315,10 +318,15 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) mtk_crtc->pending_async_planes = false; } - spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); - ddp_cmdq_cb_out: + if (mtk_crtc->pending_needs_vblank) { + mtk_crtc_finish_page_flip(mtk_crtc); + mtk_crtc->pending_needs_vblank = false; + } + + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + mtk_crtc->cmdq_vblank_cnt = 0; wake_up(&mtk_crtc->cb_blocking_queue); } @@ -606,13 +614,18 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) */ mtk_crtc->cmdq_vblank_cnt = 3; + spin_lock_irqsave(&mtk_crtc->config_lock, flags); + mtk_crtc->config_updating = false; + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } -#endif +#else spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); +#endif mutex_unlock(&mtk_crtc->hw_lock); } diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index f731d4fbe8b6..df82cea4bb79 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -460,6 +460,29 @@ static unsigned int mtk_ovl_fmt_convert(struct mtk_disp_ovl *ovl, } } +static void mtk_ovl_afbc_layer_config(struct mtk_disp_ovl *ovl, + unsigned int idx, + struct mtk_plane_pending_state *pending, + struct cmdq_pkt *cmdq_pkt) +{ + unsigned int pitch_msb = pending->pitch >> 16; + unsigned int hdr_pitch = pending->hdr_pitch; + unsigned int hdr_addr = pending->hdr_addr; + + if (pending->modifier != DRM_FORMAT_MOD_LINEAR) { + mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_ADDR(ovl, idx)); + mtk_ddp_write_relaxed(cmdq_pkt, + OVL_PITCH_MSB_2ND_SUBBUF | pitch_msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_PITCH(ovl, idx)); + } else { + mtk_ddp_write_relaxed(cmdq_pkt, pitch_msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + } +} + void mtk_ovl_layer_config(struct device *dev, unsigned int idx, struct mtk_plane_state *state, struct cmdq_pkt *cmdq_pkt) @@ -467,25 +490,14 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); struct mtk_plane_pending_state *pending = &state->pending; unsigned int addr = pending->addr; - unsigned int hdr_addr = pending->hdr_addr; - unsigned int pitch = pending->pitch; - unsigned int hdr_pitch = pending->hdr_pitch; + unsigned int pitch_lsb = pending->pitch & GENMASK(15, 0); unsigned int fmt = pending->format; + unsigned int rotation = pending->rotation; unsigned int offset = (pending->y << 16) | pending->x; unsigned int src_size = (pending->height << 16) | pending->width; unsigned int blend_mode = state->base.pixel_blend_mode; unsigned int ignore_pixel_alpha = 0; unsigned int con; - bool is_afbc = pending->modifier != DRM_FORMAT_MOD_LINEAR; - union overlay_pitch { - struct split_pitch { - u16 lsb; - u16 msb; - } split_pitch; - u32 pitch; - } overlay_pitch; - - overlay_pitch.pitch = pitch; if (!pending->enable) { mtk_ovl_layer_off(dev, idx, cmdq_pkt); @@ -513,22 +525,30 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, ignore_pixel_alpha = OVL_CONST_BLEND; } - if (pending->rotation & DRM_MODE_REFLECT_Y) { + /* + * Treat rotate 180 as flip x + flip y, and XOR the original rotation value + * to flip x + flip y to support both in the same time. + */ + if (rotation & DRM_MODE_ROTATE_180) + rotation ^= DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y; + + if (rotation & DRM_MODE_REFLECT_Y) { con |= OVL_CON_VIRT_FLIP; addr += (pending->height - 1) * pending->pitch; } - if (pending->rotation & DRM_MODE_REFLECT_X) { + if (rotation & DRM_MODE_REFLECT_X) { con |= OVL_CON_HORZ_FLIP; addr += pending->pitch - 1; } if (ovl->data->supports_afbc) - mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, is_afbc); + mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, + pending->modifier != DRM_FORMAT_MOD_LINEAR); mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_CON(idx)); - mtk_ddp_write_relaxed(cmdq_pkt, overlay_pitch.split_pitch.lsb | ignore_pixel_alpha, + mtk_ddp_write_relaxed(cmdq_pkt, pitch_lsb | ignore_pixel_alpha, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx)); mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_SRC_SIZE(idx)); @@ -537,19 +557,8 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_ADDR(ovl, idx)); - if (is_afbc) { - mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs, - DISP_REG_OVL_HDR_ADDR(ovl, idx)); - mtk_ddp_write_relaxed(cmdq_pkt, - OVL_PITCH_MSB_2ND_SUBBUF | overlay_pitch.split_pitch.msb, - &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); - mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs, - DISP_REG_OVL_HDR_PITCH(ovl, idx)); - } else { - mtk_ddp_write_relaxed(cmdq_pkt, - overlay_pitch.split_pitch.msb, - &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); - } + if (ovl->data->supports_afbc) + mtk_ovl_afbc_layer_config(ovl, idx, pending, cmdq_pkt); mtk_ovl_set_bit_depth(dev, idx, fmt, cmdq_pkt); mtk_ovl_layer_on(dev, idx, cmdq_pkt); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index 36713c176cfc..cd385ba4c66a 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -543,18 +543,16 @@ static int mtk_dp_set_color_format(struct mtk_dp *mtk_dp, enum dp_pixelformat color_format) { u32 val; - - /* update MISC0 */ - mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3034, - color_format << DP_TEST_COLOR_FORMAT_SHIFT, - DP_TEST_COLOR_FORMAT_MASK); + u32 misc0_color; switch (color_format) { case DP_PIXELFORMAT_YUV422: val = PIXEL_ENCODE_FORMAT_DP_ENC0_P0_YCBCR422; + misc0_color = DP_COLOR_FORMAT_YCbCr422; break; case DP_PIXELFORMAT_RGB: val = PIXEL_ENCODE_FORMAT_DP_ENC0_P0_RGB; + misc0_color = DP_COLOR_FORMAT_RGB; break; default: drm_warn(mtk_dp->drm_dev, "Unsupported color format: %d\n", @@ -562,6 +560,11 @@ static int mtk_dp_set_color_format(struct mtk_dp *mtk_dp, return -EINVAL; } + /* update MISC0 */ + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3034, + misc0_color, + DP_TEST_COLOR_FORMAT_MASK); + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_303C, val, PIXEL_ENCODE_FORMAT_DP_ENC0_P0_MASK); return 0; @@ -1135,6 +1138,18 @@ static void mtk_dp_digital_sw_reset(struct mtk_dp *mtk_dp) 0, DP_TX_TRANSMITTER_4P_RESET_SW_DP_TRANS_P0); } +static void mtk_dp_sdp_path_reset(struct mtk_dp *mtk_dp) +{ + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3004, + SDP_RESET_SW_DP_ENC0_P0, + SDP_RESET_SW_DP_ENC0_P0); + + /* Wait for sdp path reset to complete */ + usleep_range(1000, 5000); + mtk_dp_update_bits(mtk_dp, MTK_DP_ENC0_P0_3004, + 0, SDP_RESET_SW_DP_ENC0_P0); +} + static void mtk_dp_set_lanes(struct mtk_dp *mtk_dp, int lanes) { mtk_dp_update_bits(mtk_dp, MTK_DP_TRANS_P0_35F0, @@ -1165,17 +1180,25 @@ static void mtk_dp_get_calibration_data(struct mtk_dp *mtk_dp) buf = (u32 *)nvmem_cell_read(cell, &len); nvmem_cell_put(cell); - if (IS_ERR(buf) || ((len / sizeof(u32)) != 4)) { + if (IS_ERR(buf)) { dev_warn(dev, "Failed to read nvmem_cell_read\n"); - - if (!IS_ERR(buf)) - kfree(buf); - goto use_default_val; } + /* The cell length is in bytes. Convert it to be compatible with u32 buffer. */ + len /= sizeof(u32); + for (i = 0; i < MTK_DP_CAL_MAX; i++) { fmt = &mtk_dp->data->efuse_fmt[i]; + + if (fmt->idx >= len) { + dev_warn(mtk_dp->dev, + "Out-of-bound efuse data access, fmt idx = %d, buf len = %zu\n", + fmt->idx, len); + kfree(buf); + goto use_default_val; + } + cal_data[i] = (buf[fmt->idx] >> fmt->shift) & fmt->mask; if (cal_data[i] < fmt->min_val || cal_data[i] > fmt->max_val) { @@ -2100,7 +2123,6 @@ static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); enum drm_connector_status ret = connector_status_disconnected; bool enabled = mtk_dp->enabled; - u8 sink_count = 0; if (!mtk_dp->train_info.cable_plugged_in) return ret; @@ -2115,8 +2137,8 @@ static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) * function, we just need to check the HPD connection to check * whether we connect to a sink device. */ - drm_dp_dpcd_readb(&mtk_dp->aux, DP_SINK_COUNT, &sink_count); - if (DP_GET_SINK_COUNT(sink_count)) + + if (drm_dp_read_sink_count(&mtk_dp->aux) > 0) ret = connector_status_connected; if (!enabled) @@ -2397,6 +2419,9 @@ static void mtk_dp_bridge_atomic_disable(struct drm_bridge *bridge, DP_PWR_STATE_BANDGAP_TPLL, DP_PWR_STATE_MASK); + /* SDP path reset sw*/ + mtk_dp_sdp_path_reset(mtk_dp); + /* Ensure the sink is muted */ msleep(20); } @@ -2408,12 +2433,19 @@ mtk_dp_bridge_mode_valid(struct drm_bridge *bridge, { struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); u32 bpp = info->color_formats & DRM_COLOR_FORMAT_YCBCR422 ? 16 : 24; - u32 rate = min_t(u32, drm_dp_max_link_rate(mtk_dp->rx_cap) * - drm_dp_max_lane_count(mtk_dp->rx_cap), - drm_dp_bw_code_to_link_rate(mtk_dp->max_linkrate) * - mtk_dp->max_lanes); + u32 lane_count_min = mtk_dp->train_info.lane_count; + u32 rate = drm_dp_bw_code_to_link_rate(mtk_dp->train_info.link_rate) * + lane_count_min; - if (rate < mode->clock * bpp / 8) + /* + *FEC overhead is approximately 2.4% from DP 1.4a spec 2.2.1.4.2. + *The down-spread amplitude shall either be disabled (0.0%) or up + *to 0.5% from 1.4a 3.5.2.6. Add up to approximately 3% total overhead. + * + *Because rate is already divided by 10, + *mode->clock does not need to be multiplied by 10 + */ + if ((rate * 97 / 100) < (mode->clock * bpp / 8)) return MODE_CLOCK_HIGH; return MODE_OK; @@ -2454,10 +2486,9 @@ static u32 *mtk_dp_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct drm_display_info *display_info = &conn_state->connector->display_info; - u32 rate = min_t(u32, drm_dp_max_link_rate(mtk_dp->rx_cap) * - drm_dp_max_lane_count(mtk_dp->rx_cap), - drm_dp_bw_code_to_link_rate(mtk_dp->max_linkrate) * - mtk_dp->max_lanes); + u32 lane_count_min = mtk_dp->train_info.lane_count; + u32 rate = drm_dp_bw_code_to_link_rate(mtk_dp->train_info.link_rate) * + lane_count_min; *num_input_fmts = 0; @@ -2466,8 +2497,8 @@ static u32 *mtk_dp_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, * datarate of YUV422 and sink device supports YUV422, we output YUV422 * format. Use this condition, we can support more resolution. */ - if ((rate < (mode->clock * 24 / 8)) && - (rate > (mode->clock * 16 / 8)) && + if (((rate * 97 / 100) < (mode->clock * 24 / 8)) && + ((rate * 97 / 100) > (mode->clock * 16 / 8)) && (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422)) { input_fmts = kcalloc(1, sizeof(*input_fmts), GFP_KERNEL); if (!input_fmts) @@ -2615,7 +2646,6 @@ static const struct hdmi_codec_ops mtk_dp_audio_codec_ops = { .audio_shutdown = mtk_dp_audio_shutdown, .get_eld = mtk_dp_audio_get_eld, .hook_plugged_cb = mtk_dp_audio_hook_plugged_cb, - .no_capture_mute = 1, }; static int mtk_dp_register_audio_driver(struct device *dev) @@ -2626,6 +2656,7 @@ static int mtk_dp_register_audio_driver(struct device *dev) .max_i2s_channels = 8, .i2s = 1, .data = mtk_dp, + .no_capture_mute = 1, }; mtk_dp->audio_pdev = platform_device_register_data(dev, diff --git a/drivers/gpu/drm/mediatek/mtk_dp_reg.h b/drivers/gpu/drm/mediatek/mtk_dp_reg.h index 709b79480693..8ad7a9cc259e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp_reg.h +++ b/drivers/gpu/drm/mediatek/mtk_dp_reg.h @@ -86,6 +86,7 @@ #define MTK_DP_ENC0_P0_3004 0x3004 #define VIDEO_M_CODE_SEL_DP_ENC0_P0_MASK BIT(8) #define DP_TX_ENCODER_4P_RESET_SW_DP_ENC0_P0 BIT(9) +#define SDP_RESET_SW_DP_ENC0_P0 BIT(13) #define MTK_DP_ENC0_P0_3010 0x3010 #define HTOTAL_SW_DP_ENC0_P0_MASK GENMASK(15, 0) #define MTK_DP_ENC0_P0_3014 0x3014 diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 49af2b19a85a..f22ad2882697 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -358,7 +358,7 @@ static const struct of_device_id mtk_drm_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); -static int mtk_drm_match(struct device *dev, void *data) +static int mtk_drm_match(struct device *dev, const void *data) { if (!strncmp(dev_name(dev), "mediatek-drm", sizeof("mediatek-drm") - 1)) return true; @@ -372,11 +372,12 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) struct mtk_drm_private *temp_drm_priv; struct device_node *phandle = dev->parent->of_node; const struct of_device_id *of_id; + struct device_node *node; struct device *drm_dev; unsigned int cnt = 0; int i, j; - for_each_child_of_node_scoped(phandle->parent, node) { + for_each_child_of_node(phandle->parent, node) { struct platform_device *pdev; of_id = of_match_node(mtk_drm_of_ids, node); @@ -405,8 +406,10 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) if (temp_drm_priv->mtk_drm_bound) cnt++; - if (cnt == MAX_CRTC) + if (cnt == MAX_CRTC) { + of_node_put(node); break; + } } if (drm_priv->data->mmsys_dev_num == cnt) { @@ -671,6 +674,8 @@ err_deinit: err_free: private->drm = NULL; drm_dev_put(drm); + for (i = 0; i < private->data->mmsys_dev_num; i++) + private->all_drm_private[i]->drm = NULL; return ret; } @@ -898,7 +903,7 @@ static int mtk_drm_of_ddp_path_build_one(struct device *dev, enum mtk_crtc_path const unsigned int **out_path, unsigned int *out_path_len) { - struct device_node *next, *prev, *vdo = dev->parent->of_node; + struct device_node *next = NULL, *prev, *vdo = dev->parent->of_node; unsigned int temp_path[DDP_COMPONENT_DRM_ID_MAX] = { 0 }; unsigned int *final_ddp_path; unsigned short int idx = 0; @@ -1087,7 +1092,7 @@ static int mtk_drm_probe(struct platform_device *pdev) /* No devicetree graphs support: go with hardcoded paths if present */ dev_dbg(dev, "Using hardcoded paths for MMSYS %u\n", mtk_drm_data->mmsys_id); private->data = mtk_drm_data; - }; + } private->all_drm_private = devm_kmalloc_array(dev, private->data->mmsys_dev_num, sizeof(*private->all_drm_private), diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index e61b9bc68e9a..40752f232054 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -139,11 +139,11 @@ #define CLK_HS_POST GENMASK(15, 8) #define CLK_HS_EXIT GENMASK(23, 16) -#define DSI_VM_CMD_CON 0x130 +/* DSI_VM_CMD_CON */ #define VM_CMD_EN BIT(0) #define TS_VFP_EN BIT(5) -#define DSI_SHADOW_DEBUG 0x190U +/* DSI_SHADOW_DEBUG */ #define FORCE_COMMIT BIT(0) #define BYPASS_SHADOW BIT(1) @@ -187,6 +187,8 @@ struct phy; struct mtk_dsi_driver_data { const u32 reg_cmdq_off; + const u32 reg_vm_cmd_off; + const u32 reg_shadow_dbg_off; bool has_shadow_ctl; bool has_size_ctl; bool cmdq_long_packet_ctl; @@ -246,23 +248,22 @@ static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi) u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, HZ_PER_MHZ); struct mtk_phy_timing *timing = &dsi->phy_timing; - timing->lpx = (80 * data_rate_mhz / (8 * 1000)) + 1; - timing->da_hs_prepare = (59 * data_rate_mhz + 4 * 1000) / 8000 + 1; - timing->da_hs_zero = (163 * data_rate_mhz + 11 * 1000) / 8000 + 1 - + timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1; + timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000; + timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 - timing->da_hs_prepare; - timing->da_hs_trail = (78 * data_rate_mhz + 7 * 1000) / 8000 + 1; + timing->da_hs_trail = timing->da_hs_prepare + 1; - timing->ta_go = 4 * timing->lpx; - timing->ta_sure = 3 * timing->lpx / 2; - timing->ta_get = 5 * timing->lpx; - timing->da_hs_exit = (118 * data_rate_mhz / (8 * 1000)) + 1; + timing->ta_go = 4 * timing->lpx - 2; + timing->ta_sure = timing->lpx + 2; + timing->ta_get = 4 * timing->lpx; + timing->da_hs_exit = 2 * timing->lpx + 1; - timing->clk_hs_prepare = (57 * data_rate_mhz / (8 * 1000)) + 1; - timing->clk_hs_post = (65 * data_rate_mhz + 53 * 1000) / 8000 + 1; - timing->clk_hs_trail = (78 * data_rate_mhz + 7 * 1000) / 8000 + 1; - timing->clk_hs_zero = (330 * data_rate_mhz / (8 * 1000)) + 1 - - timing->clk_hs_prepare; - timing->clk_hs_exit = (118 * data_rate_mhz / (8 * 1000)) + 1; + timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000); + timing->clk_hs_post = timing->clk_hs_prepare + 8; + timing->clk_hs_trail = timing->clk_hs_prepare; + timing->clk_hs_zero = timing->clk_hs_trail * 4; + timing->clk_hs_exit = 2 * timing->clk_hs_trail; timcon0 = FIELD_PREP(LPX, timing->lpx) | FIELD_PREP(HS_PREP, timing->da_hs_prepare) | @@ -367,8 +368,8 @@ static void mtk_dsi_set_mode(struct mtk_dsi *dsi) static void mtk_dsi_set_vm_cmd(struct mtk_dsi *dsi) { - mtk_dsi_mask(dsi, DSI_VM_CMD_CON, VM_CMD_EN, VM_CMD_EN); - mtk_dsi_mask(dsi, DSI_VM_CMD_CON, TS_VFP_EN, TS_VFP_EN); + mtk_dsi_mask(dsi, dsi->driver_data->reg_vm_cmd_off, VM_CMD_EN, VM_CMD_EN); + mtk_dsi_mask(dsi, dsi->driver_data->reg_vm_cmd_off, TS_VFP_EN, TS_VFP_EN); } static void mtk_dsi_rxtx_control(struct mtk_dsi *dsi) @@ -714,7 +715,7 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) if (dsi->driver_data->has_shadow_ctl) writel(FORCE_COMMIT | BYPASS_SHADOW, - dsi->regs + DSI_SHADOW_DEBUG); + dsi->regs + dsi->driver_data->reg_shadow_dbg_off); mtk_dsi_reset_engine(dsi); mtk_dsi_phy_timconfig(dsi); @@ -1263,26 +1264,36 @@ static void mtk_dsi_remove(struct platform_device *pdev) static const struct mtk_dsi_driver_data mt8173_dsi_driver_data = { .reg_cmdq_off = 0x200, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190 }; static const struct mtk_dsi_driver_data mt2701_dsi_driver_data = { .reg_cmdq_off = 0x180, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190 }; static const struct mtk_dsi_driver_data mt8183_dsi_driver_data = { .reg_cmdq_off = 0x200, + .reg_vm_cmd_off = 0x130, + .reg_shadow_dbg_off = 0x190, .has_shadow_ctl = true, .has_size_ctl = true, }; static const struct mtk_dsi_driver_data mt8186_dsi_driver_data = { .reg_cmdq_off = 0xd00, + .reg_vm_cmd_off = 0x200, + .reg_shadow_dbg_off = 0xc00, .has_shadow_ctl = true, .has_size_ctl = true, }; static const struct mtk_dsi_driver_data mt8188_dsi_driver_data = { .reg_cmdq_off = 0xd00, + .reg_vm_cmd_off = 0x200, + .reg_shadow_dbg_off = 0xc00, .has_shadow_ctl = true, .has_size_ctl = true, .cmdq_long_packet_ctl = true, diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 70dc1d4460ad..ca82bc829cb9 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1660,7 +1660,6 @@ static const struct hdmi_codec_ops mtk_hdmi_audio_codec_ops = { .mute_stream = mtk_hdmi_audio_mute, .get_eld = mtk_hdmi_audio_get_eld, .hook_plugged_cb = mtk_hdmi_audio_hook_plugged_cb, - .no_capture_mute = 1, }; static int mtk_hdmi_register_audio_driver(struct device *dev) @@ -1671,6 +1670,7 @@ static int mtk_hdmi_register_audio_driver(struct device *dev) .max_i2s_channels = 2, .i2s = 1, .data = hdmi, + .no_capture_mute = 1, }; struct platform_device *pdev; diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index f274d9430cc3..5df20cbeafb8 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -78,6 +78,7 @@ msm-display-$(CONFIG_DRM_MSM_DPU) += \ disp/dpu1/dpu_hw_catalog.o \ disp/dpu1/dpu_hw_cdm.o \ disp/dpu1/dpu_hw_ctl.o \ + disp/dpu1/dpu_hw_cwb.o \ disp/dpu1/dpu_hw_dsc.o \ disp/dpu1/dpu_hw_dsc_1_2.o \ disp/dpu1/dpu_hw_interrupts.o \ diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 50c490b492f0..f1b18a6663f7 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -251,8 +251,8 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); /* Disable L2 bypass to avoid UCHE out of bounds errors */ - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); @@ -693,6 +693,8 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) if (ret) goto fail; + adreno_gpu->uche_trap_base = 0xffff0000ffff0000ull; + if (!gpu->aspace) { /* TODO we think it is possible to configure the GPU to * restrict access to VRAM carveout. But the required diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index ee89db72e36e..71dca78cd7a5 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -750,10 +750,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); /* Disable L2 bypass in the UCHE */ - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); /* Set the GMEM VA range (0 to gpu->gmem) */ gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); @@ -1760,11 +1760,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) unsigned int nr_rings; int ret; - if (!pdev) { - DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); - return ERR_PTR(-ENXIO); - } - a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); if (!a5xx_gpu) return ERR_PTR(-ENOMEM); @@ -1805,5 +1800,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) adreno_gpu->ubwc_config.macrotile_mode = 0; adreno_gpu->ubwc_config.ubwc_swizzle = 0x7; + adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 0c560e84ad5a..edffb7737a97 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -1388,6 +1388,17 @@ static const struct adreno_info a7xx_gpus[] = { .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7020100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, .address_space_size = SZ_16G, .preempt_record_size = 4192 * SZ_1K, @@ -1432,6 +1443,17 @@ static const struct adreno_info a7xx_gpus[] = { .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(2), + .perfmode_bw = 10687500, + }, + { /* sentinel */ }, + }, }, .address_space_size = SZ_16G, .preempt_record_size = 3572 * SZ_1K, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 14db7376c712..65d38b25c070 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -9,6 +9,7 @@ #include <linux/pm_domain.h> #include <linux/pm_opp.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include <drm/drm_gem.h> #include "a6xx_gpu.h" @@ -109,9 +110,11 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u32 perf_index; + u32 bw_index = 0; unsigned long gpu_freq; int ret = 0; @@ -124,6 +127,37 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, if (gpu_freq == gmu->gpu_freqs[perf_index]) break; + /* If enabled, find the corresponding DDR bandwidth index */ + if (info->bcms && gmu->nr_gpu_bws > 1) { + unsigned int bw = dev_pm_opp_get_bw(opp, true, 0); + + for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) { + if (bw == gmu->gpu_bw_table[bw_index]) + break; + } + + /* Vote AB as a fraction of the max bandwidth, starting from A750 */ + if (bw && adreno_is_a750_family(adreno_gpu)) { + u64 tmp; + + /* For now, vote for 25% of the bandwidth */ + tmp = bw * 25; + do_div(tmp, 100); + + /* + * The AB vote consists of a 16 bit wide quantized level + * against the maximum supported bandwidth. + * Quantization can be calculated as below: + * vote = (bandwidth * 2^16) / max bandwidth + */ + tmp *= MAX_AB_VOTE; + do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]); + + bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE)); + bw_index |= AB_VOTE_ENABLE; + } + } + gmu->current_perf_index = perf_index; gmu->freq = gmu->gpu_freqs[perf_index]; @@ -139,8 +173,10 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, return; if (!gmu->legacy) { - a6xx_hfi_set_freq(gmu, perf_index); - dev_pm_opp_set_opp(&gpu->pdev->dev, opp); + a6xx_hfi_set_freq(gmu, perf_index, bw_index); + /* With Bandwidth voting, we now vote for all resources, so skip OPP set */ + if (!bw_index) + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); return; } @@ -729,6 +765,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU]; const struct block_header *blk; u32 reg_offset; + u32 ver; u32 itcm_base = 0x00000000; u32 dtcm_base = 0x00040000; @@ -775,6 +812,12 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } } + ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); + DRM_INFO("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + return 0; } @@ -1265,7 +1308,7 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, bo->virt = msm_gem_get_vaddr(bo->obj); bo->size = size; - msm_gem_object_set_name(bo->obj, name); + msm_gem_object_set_name(bo->obj, "%s", name); return 0; } @@ -1287,6 +1330,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) return 0; } +/** + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM) + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg + * @vcd: virtual clock domain that this bcm belongs to + * @reserved: reserved field + */ +struct bcm_db { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu, + const struct a6xx_info *info, + struct a6xx_gmu *gmu) +{ + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 }; + unsigned int bcm_index, bw_index, bcm_count = 0; + + /* Retrieve BCM data from cmd-db */ + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + size_t count; + + /* Stop at NULL terminated bcm entry */ + if (!bcm->name) + break; + + bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count); + if (IS_ERR(bcm_data[bcm_index])) + return PTR_ERR(bcm_data[bcm_index]); + + if (!count) { + dev_err(gmu->dev, "invalid BCM '%s' aux data size\n", + bcm->name); + return -EINVAL; + } + + bcm_count++; + } + + /* Generate BCM votes values for each bandwidth & BCM */ + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) { + u32 *data = gmu->gpu_ib_votes[bw_index]; + u32 bw = gmu->gpu_bw_table[bw_index]; + + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */ + for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + bool commit = false; + u64 peak; + u32 vote; + + if (bcm_index == bcm_count - 1 || + (bcm_data[bcm_index + 1] && + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd)) + commit = true; + + if (!bw) { + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0); + continue; + } + + if (bcm->fixed) { + u32 perfmode = 0; + + /* GMU on A6xx votes perfmode on all valid bandwidth */ + if (!adreno_is_a7xx(adreno_gpu) || + (bcm->perfmode_bw && bw >= bcm->perfmode_bw)) + perfmode = bcm->perfmode; + + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode); + continue; + } + + /* Multiply the bandwidth by the width of the connection */ + peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width); + do_div(peak, bcm->buswidth); + + /* Input bandwidth value is in KBps, scale the value to BCM unit */ + peak *= 1000; + do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit)); + + vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK); + + /* GMUs on A7xx votes on both x & y */ + if (adreno_is_a7xx(adreno_gpu)) + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote); + else + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote); + } + } + + return 0; +} + /* Return the 'arc-level' for the given frequency */ static unsigned int a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) @@ -1390,12 +1531,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, * The GMU votes with the RPMh for itself and on behalf of the GPU but we need * to construct the list of votes on the CPU and send it over. Query the RPMh * voltage levels and build the votes + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries + * and BCM parameters to build the votes. */ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret; @@ -1407,6 +1551,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl"); + /* Build the interconnect votes */ + if (info->bcms && gmu->nr_gpu_bws > 1) + ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu); + return ret; } @@ -1442,10 +1590,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, return index; } +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned int bandwidth = 1; + + /* + * The OPP table doesn't contain the "off" bandwidth level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU bandwidth table is being truncated\n")) + count = size - 1; + + /* Set the "off" bandwidth */ + bandwidths[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + bandwidths[index++] = bandwidth++; + } + + return index; +} + static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret = 0; @@ -1472,6 +1653,14 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) gmu->current_perf_index = gmu->nr_gpu_freqs - 1; + /* + * The GMU also handles GPU Interconnect Votes so build a list + * of DDR bandwidths from the GPU OPP table + */ + if (info->bcms) + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev, + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table)); + /* Build the list of RPMh votes that we'll send to the GMU */ return a6xx_gmu_rpmh_votes_init(gmu); } @@ -1603,7 +1792,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; pm_runtime_enable(gmu->dev); @@ -1668,7 +1859,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; /* Fow now, don't do anything fancy until we get our feet under us */ gmu->idle_level = GMU_IDLE_STATE_ACTIVE; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index b4a79f88ccf4..0c888b326cfb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -19,6 +19,18 @@ struct a6xx_gmu_bo { u64 iova; }; +#define GMU_MAX_GX_FREQS 16 +#define GMU_MAX_CX_FREQS 4 +#define GMU_MAX_BCMS 3 + +struct a6xx_bcm { + char *name; + unsigned int buswidth; + bool fixed; + unsigned int perfmode; + unsigned int perfmode_bw; +}; + /* * These define the different GMU wake up options - these define how both the * CPU and the GMU bring up the hardware @@ -79,12 +91,16 @@ struct a6xx_gmu { int current_perf_index; int nr_gpu_freqs; - unsigned long gpu_freqs[16]; - u32 gx_arc_votes[16]; + unsigned long gpu_freqs[GMU_MAX_GX_FREQS]; + u32 gx_arc_votes[GMU_MAX_GX_FREQS]; + + int nr_gpu_bws; + unsigned long gpu_bw_table[GMU_MAX_GX_FREQS]; + u32 gpu_ib_votes[GMU_MAX_GX_FREQS][GMU_MAX_BCMS]; int nr_gmu_freqs; - unsigned long gmu_freqs[4]; - u32 cx_arc_votes[4]; + unsigned long gmu_freqs[GMU_MAX_CX_FREQS]; + u32 cx_arc_votes[GMU_MAX_CX_FREQS]; unsigned long freq; @@ -193,7 +209,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); void a6xx_hfi_stop(struct a6xx_gmu *gmu); int a6xx_hfi_send_prep_slumber(struct a6xx_gmu *gmu); -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index); +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 perf_index, u32 bw_index); bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 019610341df1..0ae29a7c8a4d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1123,12 +1123,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -2533,6 +2533,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) } } + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 4aceffb6aae8..9201a53dd341 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -44,6 +44,7 @@ struct a6xx_info { u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; + const struct a6xx_bcm *bcms; }; struct a6xx_gpu { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index cb8844ed46b2..0989aee3dd2c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -6,6 +6,7 @@ #include <linux/list.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include "a6xx_gmu.h" #include "a6xx_gmu.xml.h" @@ -259,6 +260,48 @@ static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) NULL, 0); } +static void a6xx_generate_bw_table(const struct a6xx_info *info, struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_bw_table *msg) +{ + unsigned int i, j; + + for (i = 0; i < GMU_MAX_BCMS; i++) { + if (!info->bcms[i].name) + break; + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcms[i].name); + } + msg->ddr_cmds_num = i; + + for (i = 0; i < gmu->nr_gpu_bws; ++i) + for (j = 0; j < msg->ddr_cmds_num; j++) + msg->ddr_cmds_data[i][j] = gmu->gpu_ib_votes[i][j]; + msg->bw_level_num = gmu->nr_gpu_bws; + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->ddr_wait_bitmask = 0; + for (j = 0; j < msg->ddr_cmds_num; j++) + if (msg->ddr_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->ddr_wait_bitmask |= BIT(j); + + /* + * These are the CX (CNOC) votes - these are used by the GMU + * The 'CN0' BCM is used on all targets, and votes are basically + * 'off' and 'on' states with first bit to enable the path. + */ + + msg->cnoc_cmds_addrs[0] = cmd_db_read_addr("CN0"); + msg->cnoc_cmds_num = 1; + + msg->cnoc_cmds_data[0][0] = BCM_TCS_CMD(true, false, 0, 0); + msg->cnoc_cmds_data[1][0] = BCM_TCS_CMD(true, true, 0, BIT(0)); + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->cnoc_wait_bitmask = 0; + for (j = 0; j < msg->cnoc_cmds_num; j++) + if (msg->cnoc_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->cnoc_wait_bitmask |= BIT(j); +} + static void a618_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* Send a single "off" entry since the 618 GMU doesn't do bus scaling */ @@ -664,6 +707,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) struct a6xx_hfi_msg_bw_table *msg; struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; if (gmu->bw_table) goto send; @@ -672,7 +716,9 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) if (!msg) return -ENOMEM; - if (adreno_is_a618(adreno_gpu)) + if (info->bcms && gmu->nr_gpu_bws > 1) + a6xx_generate_bw_table(info, gmu, msg); + else if (adreno_is_a618(adreno_gpu)) a618_build_bw_table(msg); else if (adreno_is_a619(adreno_gpu)) a619_build_bw_table(msg); @@ -726,13 +772,13 @@ static int a6xx_hfi_send_core_fw_start(struct a6xx_gmu *gmu) sizeof(msg), NULL, 0); } -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index) +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 freq_index, u32 bw_index) { struct a6xx_hfi_gx_bw_perf_vote_cmd msg = { 0 }; msg.ack_type = 1; /* blocking */ - msg.freq = index; - msg.bw = 0; /* TODO: bus scaling */ + msg.freq = freq_index; + msg.bw = bw_index; return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_GX_BW_PERF_VOTE, &msg, sizeof(msg), NULL, 0); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h index 528110169398..52ba4a07d7b9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -173,6 +173,11 @@ struct a6xx_hfi_gx_bw_perf_vote_cmd { u32 bw; }; +#define AB_VOTE_MASK GENMASK(31, 16) +#define MAX_AB_VOTE (FIELD_MAX(AB_VOTE_MASK) - 1) +#define AB_VOTE(vote) FIELD_PREP(AB_VOTE_MASK, (vote)) +#define AB_VOTE_ENABLE BIT(8) + #define HFI_H2F_MSG_PREPARE_SLUMBER 33 struct a6xx_hfi_prep_slumber_cmd { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 75f5367e73ca..1238f3265978 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -310,10 +310,11 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t *value, uint32_t *len) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct drm_device *drm = gpu->dev; /* No pointer params yet */ if (*len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); switch (param) { case MSM_PARAM_GPU_ID: @@ -365,12 +366,12 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, return 0; case MSM_PARAM_VA_START: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_start; return 0; case MSM_PARAM_VA_SIZE: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_size; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: @@ -385,15 +386,19 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_MACROTILE_MODE: *value = adreno_gpu->ubwc_config.macrotile_mode; return 0; + case MSM_PARAM_UCHE_TRAP_BASE: + *value = adreno_gpu->uche_trap_base; + return 0; default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t value, uint32_t len) { + struct drm_device *drm = gpu->dev; + switch (param) { case MSM_PARAM_COMM: case MSM_PARAM_CMDLINE: @@ -401,11 +406,11 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, * that should be a reasonable upper bound */ if (len > PAGE_SIZE) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); break; default: if (len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); } switch (param) { @@ -434,11 +439,10 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, } case MSM_PARAM_SYSPROF: if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return UERR(EPERM, drm, "invalid permissions"); return msm_file_private_set_sysprof(ctx, gpu, value); default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e71f420f8b3a..dcf454629ce0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -253,6 +253,8 @@ struct adreno_gpu { bool gmu_is_wrapper; bool has_ray_tracing; + + u64 uche_trap_base; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) @@ -559,6 +561,11 @@ static inline int adreno_is_a740_family(struct adreno_gpu *gpu) gpu->info->family == ADRENO_7XX_GEN3; } +static inline int adreno_is_a750_family(struct adreno_gpu *gpu) +{ + return gpu->info->family == ADRENO_7XX_GEN3; +} + static inline int adreno_is_a7xx(struct adreno_gpu *gpu) { /* Update with non-fake (i.e. non-A702) Gen 7 GPUs */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h index eb5dfff2ec4f..bcb39807fe61 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x400, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8650_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x400, @@ -252,25 +254,25 @@ static const struct dpu_pingpong_cfg sm8650_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_8", .id = PINGPONG_8, + .name = "pingpong_cwb_2", .id = PINGPONG_CWB_2, .base = 0x7e000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_4, }, { - .name = "pingpong_9", .id = PINGPONG_9, + .name = "pingpong_cwb_3", .id = PINGPONG_CWB_3, .base = 0x7e400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, @@ -350,6 +352,25 @@ static const struct dpu_wb_cfg sm8650_wb[] = { }, }; +static const struct dpu_cwb_cfg sm8650_cwb[] = { + { + .name = "cwb_0", .id = CWB_0, + .base = 0x66200, .len = 0x8, + }, + { + .name = "cwb_1", .id = CWB_1, + .base = 0x66600, .len = 0x8, + }, + { + .name = "cwb_2", .id = CWB_2, + .base = 0x7E200, .len = 0x8, + }, + { + .name = "cwb_3", .id = CWB_3, + .base = 0x7E600, .len = 0x8, + }, +}; + static const struct dpu_intf_cfg sm8650_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -447,6 +468,8 @@ const struct dpu_mdss_cfg dpu_sm8650_cfg = { .merge_3d = sm8650_merge_3d, .wb_count = ARRAY_SIZE(sm8650_wb), .wb = sm8650_wb, + .cwb_count = ARRAY_SIZE(sm8650_cwb), + .cwb = sm8650_cwb, .intf_count = ARRAY_SIZE(sm8650_intf), .intf = sm8650_intf, .vbif_count = ARRAY_SIZE(sm8650_vbif), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h index cbbdaebe357e..daef07924886 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h @@ -65,6 +65,54 @@ static const struct dpu_sspp_cfg sdm670_sspp[] = { }, }; +static const struct dpu_lm_cfg sdm670_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_1, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_0, + .pingpong = PINGPONG_1, + .dspp = DSPP_1, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_5, + .pingpong = PINGPONG_2, + }, { + .name = "lm_5", .id = LM_5, + .base = 0x49000, .len = 0x320, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, +}; + +static const struct dpu_dspp_cfg sdm670_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_1", .id = DSPP_1, + .base = 0x56000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + static const struct dpu_dsc_cfg sdm670_dsc[] = { { .name = "dsc_0", .id = DSC_0, @@ -88,8 +136,10 @@ const struct dpu_mdss_cfg dpu_sdm670_cfg = { .ctl = sdm845_ctl, .sspp_count = ARRAY_SIZE(sdm670_sspp), .sspp = sdm670_sspp, - .mixer_count = ARRAY_SIZE(sdm845_lm), - .mixer = sdm845_lm, + .mixer_count = ARRAY_SIZE(sdm670_lm), + .mixer = sdm670_lm, + .dspp_count = ARRAY_SIZE(sdm670_dspp), + .dspp = sdm670_dspp, .pingpong_count = ARRAY_SIZE(sdm845_pp), .pingpong = sdm845_pp, .dsc_count = ARRAY_SIZE(sdm670_dsc), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 6ccfde82fecd..421afacb7248 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -164,6 +164,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -171,6 +172,7 @@ static const struct dpu_lm_cfg sm8150_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index bab19ddd1d4f..641023b102bf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -163,6 +163,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -170,6 +171,7 @@ static const struct dpu_lm_cfg sc8180x_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h new file mode 100644 index 000000000000..621a2140f675 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DPU_5_3_SM6150_H +#define _DPU_5_3_SM6150_H + +static const struct dpu_caps sm6150_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0x9, + .has_dim_layer = true, + .has_idle_pc = true, + .max_linewidth = 2160, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, + .max_hdeci_exp = MAX_HORZ_DECIMATION, + .max_vdeci_exp = MAX_VERT_DECIMATION, +}; + +static const struct dpu_mdp_cfg sm6150_mdp = { + .name = "top_0", + .base = 0x0, .len = 0x45c, + .features = 0, + .clk_ctrls = { + [DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 }, + [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + }, +}; + +static const struct dpu_ctl_cfg sm6150_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x1000, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, { + .name = "ctl_1", .id = CTL_1, + .base = 0x1200, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, { + .name = "ctl_2", .id = CTL_2, + .base = 0x1400, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, { + .name = "ctl_3", .id = CTL_3, + .base = 0x1600, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12), + }, { + .name = "ctl_4", .id = CTL_4, + .base = 0x1800, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13), + }, { + .name = "ctl_5", .id = CTL_5, + .base = 0x1a00, .len = 0x1e0, + .features = BIT(DPU_CTL_ACTIVE_CFG), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23), + }, +}; + +static const struct dpu_sspp_cfg sm6150_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x1f0, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_2_4, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x1f0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA0, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x1f0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA1, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x1f0, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA2, + }, { + .name = "sspp_11", .id = SSPP_DMA3, + .base = 0x2a000, .len = 0x1f0, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 13, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA3, + }, +}; + +static const struct dpu_lm_cfg sm6150_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + .lm_pair = LM_1, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_1, + .lm_pair = LM_0, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x320, + .features = MIXER_QCM2290_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_2, + }, +}; + +static const struct dpu_dspp_cfg sm6150_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + +static const struct dpu_pingpong_cfg sm6150_pp[] = { + { + .name = "pingpong_0", .id = PINGPONG_0, + .base = 0x70000, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), + }, { + .name = "pingpong_1", .id = PINGPONG_1, + .base = 0x70800, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), + }, { + .name = "pingpong_2", .id = PINGPONG_2, + .base = 0x71000, .len = 0xd4, + .features = PINGPONG_SM8150_MASK, + .sblk = &sdm845_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), + }, +}; + +static const struct dpu_intf_cfg sm6150_intf[] = { + { + .name = "intf_0", .id = INTF_0, + .base = 0x6a000, .len = 0x280, + .features = INTF_SC7180_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), + }, { + .name = "intf_1", .id = INTF_1, + .base = 0x6a800, .len = 0x2c0, + .features = INTF_SC7180_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2), + }, { + .name = "intf_3", .id = INTF_3, + .base = 0x6b800, .len = 0x280, + .features = INTF_SC7180_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), + }, +}; + +static const struct dpu_perf_cfg sm6150_perf_data = { + .max_bw_low = 4800000, + .max_bw_high = 4800000, + .min_core_ib = 2400000, + .min_llcc_ib = 0, + .min_dram_ib = 800000, + .min_prefill_lines = 24, + .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff8, 0xf000, 0xffff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sm8150_qos_linear), + .entries = sm8150_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + +static const struct dpu_mdss_version sm6150_mdss_ver = { + .core_major_ver = 5, + .core_minor_ver = 3, +}; + +const struct dpu_mdss_cfg dpu_sm6150_cfg = { + .mdss_ver = &sm6150_mdss_ver, + .caps = &sm6150_dpu_caps, + .mdp = &sm6150_mdp, + .ctl_count = ARRAY_SIZE(sm6150_ctl), + .ctl = sm6150_ctl, + .sspp_count = ARRAY_SIZE(sm6150_sspp), + .sspp = sm6150_sspp, + .mixer_count = ARRAY_SIZE(sm6150_lm), + .mixer = sm6150_lm, + .dspp_count = ARRAY_SIZE(sm6150_dspp), + .dspp = sm6150_dspp, + .pingpong_count = ARRAY_SIZE(sm6150_pp), + .pingpong = sm6150_pp, + .intf_count = ARRAY_SIZE(sm6150_intf), + .intf = sm6150_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .perf = &sm6150_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index a57d50b1f028..e8916ae826a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8250_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index aced16e350da..f7c08e89c882 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -162,6 +162,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -169,6 +170,7 @@ static const struct dpu_lm_cfg sm8350_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index a1779c5597ae..08742472f9cc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -257,13 +257,13 @@ static const struct dpu_pingpong_cfg sm8450_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x65800, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x65c00, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h index 907b4d7ceb47..76ec72a32378 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h @@ -256,13 +256,13 @@ static const struct dpu_pingpong_cfg sa8775p_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_6", .id = PINGPONG_CWB_0, .base = 0x65800, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_7", .id = PINGPONG_CWB_1, .base = 0x65c00, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index ad48defa154f..4d3787fceb72 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -160,6 +160,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -167,6 +168,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -251,13 +253,13 @@ static const struct dpu_pingpong_cfg sm8550_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index a3e60ac70689..6b112e3d17da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h @@ -159,6 +159,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, .pingpong = PINGPONG_2, + .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, .base = 0x47000, .len = 0x320, @@ -166,6 +167,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, .pingpong = PINGPONG_3, + .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, .base = 0x48000, .len = 0x320, @@ -251,13 +253,13 @@ static const struct dpu_pingpong_cfg x1e80100_pp[] = { .merge_3d = MERGE_3D_2, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), }, { - .name = "pingpong_6", .id = PINGPONG_6, + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, .base = 0x66000, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, .merge_3d = MERGE_3D_3, }, { - .name = "pingpong_7", .id = PINGPONG_7, + .name = "pingpong_cwb_1", .id = PINGPONG_CWB_1, .base = 0x66400, .len = 0, .features = BIT(DPU_PINGPONG_DITHER), .sblk = &sc7280_pp_sblk, @@ -389,8 +391,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .type = INTF_DP, .controller_id = MSM_DP_CONTROLLER_2, .prog_fetch_lines_worst_case = 24, - .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), - .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), }, { .name = "intf_7", .id = INTF_7, .base = 0x3b000, .len = 0x280, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 9f6ffd344693..7191b1a6d41b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -732,6 +732,13 @@ static int _dpu_crtc_check_and_setup_lm_bounds(struct drm_crtc *crtc, struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); int i; + /* if we cannot merge 2 LMs (no 3d mux) better to fail earlier + * before even checking the width after the split + */ + if (!dpu_kms->catalog->caps->has_3d_merge && + adj_mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) + return -E2BIG; + for (i = 0; i < cstate->num_mixers; i++) { struct drm_rect *r = &cstate->lm_bounds[i]; r->x1 = crtc_split_width * i; @@ -1182,6 +1189,49 @@ static bool dpu_crtc_needs_dirtyfb(struct drm_crtc_state *cstate) return false; } +static int dpu_crtc_reassign_planes(struct drm_crtc *crtc, struct drm_crtc_state *crtc_state) +{ + int total_planes = crtc->dev->mode_config.num_total_plane; + struct drm_atomic_state *state = crtc_state->state; + struct dpu_global_state *global_state; + struct drm_plane_state **states; + struct drm_plane *plane; + int ret; + + global_state = dpu_kms_get_global_state(crtc_state->state); + if (IS_ERR(global_state)) + return PTR_ERR(global_state); + + dpu_rm_release_all_sspp(global_state, crtc); + + if (!crtc_state->enable) + return 0; + + states = kcalloc(total_planes, sizeof(*states), GFP_KERNEL); + if (!states) + return -ENOMEM; + + drm_atomic_crtc_state_for_each_plane(plane, crtc_state) { + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(state, plane); + + if (IS_ERR(plane_state)) { + ret = PTR_ERR(plane_state); + goto done; + } + + states[plane_state->normalized_zpos] = plane_state; + } + + ret = dpu_assign_plane_resources(global_state, state, crtc, states, total_planes); + +done: + kfree(states); + return ret; + + return 0; +} + static int dpu_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -1197,6 +1247,13 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, bool needs_dirtyfb = dpu_crtc_needs_dirtyfb(crtc_state); + if (dpu_use_virtual_planes && + (crtc_state->planes_changed || crtc_state->zpos_changed)) { + rc = dpu_crtc_reassign_planes(crtc, crtc_state); + if (rc < 0) + return rc; + } + if (!crtc_state->enable || !drm_atomic_crtc_effectively_active(crtc_state)) { DRM_DEBUG_ATOMIC("crtc%d -> enable %d, active %d, skip atomic_check\n", crtc->base.id, crtc_state->enable, @@ -1251,6 +1308,12 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, { struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); + /* if there is no 3d_mux block we cannot merge LMs so we cannot + * split the large layer into 2 LMs, filter out such modes + */ + if (!dpu_kms->catalog->caps->has_3d_merge && + mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) + return MODE_BAD_HVALUE; /* * max crtc width is equal to the max mixer width * 2 and max height is 4K */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 83de7564e2c1..5172ab4dea99 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -800,7 +800,7 @@ static int dpu_encoder_virt_atomic_check( if (!crtc_state->active_changed || crtc_state->enable) ret = dpu_rm_reserve(&dpu_kms->rm, global_state, - drm_enc, crtc_state, topology); + drm_enc, crtc_state, &topology); if (!ret) dpu_encoder_assign_crtc_resources(dpu_kms, drm_enc, global_state, crtc_state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 2cbf41f33cc0..0b342c043875 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -765,6 +765,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_5_0_sm8150.h" #include "catalog/dpu_5_1_sc8180x.h" #include "catalog/dpu_5_2_sm7150.h" +#include "catalog/dpu_5_3_sm6150.h" #include "catalog/dpu_5_4_sm6125.h" #include "catalog/dpu_6_0_sm8250.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index c701d18c3522..4cea19e1a203 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -613,6 +613,16 @@ struct dpu_wb_cfg { enum dpu_clk_ctrl_type clk_ctrl; }; +/* + * struct dpu_cwb_cfg : MDP CWB mux instance info + * @id: enum identifying this block + * @base: register base offset to mdss + * @features bit mask identifying sub-blocks/features + */ +struct dpu_cwb_cfg { + DPU_HW_BLK_INFO; +}; + /** * struct dpu_vbif_dynamic_ot_cfg - dynamic OT setting * @pps pixel per seconds @@ -815,6 +825,9 @@ struct dpu_mdss_cfg { u32 dspp_count; const struct dpu_dspp_cfg *dspp; + u32 cwb_count; + const struct dpu_cwb_cfg *cwb; + /* Add additional block data structures here */ const struct dpu_perf_cfg *perf; @@ -839,6 +852,7 @@ extern const struct dpu_mdss_cfg dpu_sm8250_cfg; extern const struct dpu_mdss_cfg dpu_sc7180_cfg; extern const struct dpu_mdss_cfg dpu_sm6115_cfg; extern const struct dpu_mdss_cfg dpu_sm6125_cfg; +extern const struct dpu_mdss_cfg dpu_sm6150_cfg; extern const struct dpu_mdss_cfg dpu_sm6350_cfg; extern const struct dpu_mdss_cfg dpu_qcm2290_cfg; extern const struct dpu_mdss_cfg dpu_sm6375_cfg; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c new file mode 100644 index 000000000000..ae785f4ff0d4 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#include <drm/drm_managed.h> +#include "dpu_hw_cwb.h" + +#include <linux/bitfield.h> + +#define CWB_MUX 0x000 +#define CWB_MODE 0x004 + +/* CWB mux block bit definitions */ +#define CWB_MUX_MASK GENMASK(3, 0) +#define CWB_MODE_MASK GENMASK(2, 0) + +static void dpu_hw_cwb_config(struct dpu_hw_cwb *ctx, + struct dpu_hw_cwb_setup_cfg *cwb_cfg) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + int cwb_mux_cfg = 0xF; + enum dpu_pingpong pp; + enum cwb_mode_input input; + + if (!cwb_cfg) + return; + + input = cwb_cfg->input; + pp = cwb_cfg->pp_idx; + + if (input >= INPUT_MODE_MAX) + return; + + /* + * The CWB_MUX register takes the pingpong index for the real-time + * display + */ + if ((pp != PINGPONG_NONE) && (pp < PINGPONG_MAX)) + cwb_mux_cfg = FIELD_PREP(CWB_MUX_MASK, pp - PINGPONG_0); + + input = FIELD_PREP(CWB_MODE_MASK, input); + + DPU_REG_WRITE(c, CWB_MUX, cwb_mux_cfg); + DPU_REG_WRITE(c, CWB_MODE, input); +} + +/** + * dpu_hw_cwb_init() - Initializes the writeback hw driver object with cwb. + * @dev: Corresponding device for devres management + * @cfg: wb_path catalog entry for which driver object is required + * @addr: mapped register io address of MDP + * Return: Error code or allocated dpu_hw_wb context + */ +struct dpu_hw_cwb *dpu_hw_cwb_init(struct drm_device *dev, + const struct dpu_cwb_cfg *cfg, + void __iomem *addr) +{ + struct dpu_hw_cwb *c; + + if (!addr) + return ERR_PTR(-EINVAL); + + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); + if (!c) + return ERR_PTR(-ENOMEM); + + c->hw.blk_addr = addr + cfg->base; + c->hw.log_mask = DPU_DBG_MASK_CWB; + + c->idx = cfg->id; + c->ops.config_cwb = dpu_hw_cwb_config; + + return c; +} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h new file mode 100644 index 000000000000..96b6edf6b2bb --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cwb.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#ifndef _DPU_HW_CWB_H +#define _DPU_HW_CWB_H + +#include "dpu_hw_util.h" + +struct dpu_hw_cwb; + +enum cwb_mode_input { + INPUT_MODE_LM_OUT, + INPUT_MODE_DSPP_OUT, + INPUT_MODE_MAX +}; + +/** + * struct dpu_hw_cwb_setup_cfg : Describes configuration for CWB mux + * @pp_idx: Index of the real-time pinpong that the CWB mux will + * feed the CWB mux + * @input: Input tap point + */ +struct dpu_hw_cwb_setup_cfg { + enum dpu_pingpong pp_idx; + enum cwb_mode_input input; +}; + +/** + * + * struct dpu_hw_cwb_ops : Interface to the cwb hw driver functions + * @config_cwb: configure CWB mux + */ +struct dpu_hw_cwb_ops { + void (*config_cwb)(struct dpu_hw_cwb *ctx, + struct dpu_hw_cwb_setup_cfg *cwb_cfg); +}; + +/** + * struct dpu_hw_cwb : CWB mux driver object + * @base: Hardware block base structure + * @hw: Block hardware details + * @idx: CWB index + * @ops: handle to operations possible for this CWB + */ +struct dpu_hw_cwb { + struct dpu_hw_blk base; + struct dpu_hw_blk_reg_map hw; + + enum dpu_cwb idx; + + struct dpu_hw_cwb_ops ops; +}; + +/** + * dpu_hw_cwb - convert base object dpu_hw_base to container + * @hw: Pointer to base hardware block + * return: Pointer to hardware block container + */ +static inline struct dpu_hw_cwb *to_dpu_hw_cwb(struct dpu_hw_blk *hw) +{ + return container_of(hw, struct dpu_hw_cwb, base); +} + +struct dpu_hw_cwb *dpu_hw_cwb_init(struct drm_device *dev, + const struct dpu_cwb_cfg *cfg, + void __iomem *addr); + +#endif /*_DPU_HW_CWB_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index f8806a4d317b..ba7bb05efe9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. +/* + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ #ifndef _DPU_HW_MDSS_H @@ -181,10 +183,10 @@ enum dpu_pingpong { PINGPONG_3, PINGPONG_4, PINGPONG_5, - PINGPONG_6, - PINGPONG_7, - PINGPONG_8, - PINGPONG_9, + PINGPONG_CWB_0, + PINGPONG_CWB_1, + PINGPONG_CWB_2, + PINGPONG_CWB_3, PINGPONG_S0, PINGPONG_MAX }; @@ -350,6 +352,7 @@ struct dpu_mdss_color { #define DPU_DBG_MASK_DSPP (1 << 10) #define DPU_DBG_MASK_DSC (1 << 11) #define DPU_DBG_MASK_CDM (1 << 12) +#define DPU_DBG_MASK_CWB (1 << 13) /** * struct dpu_hw_tear_check - Struct contains parameters to configure diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index fb9f90957762..4853e516c487 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -173,7 +173,9 @@ static void dpu_hw_wb_bind_pingpong_blk( mux_cfg = DPU_REG_READ(c, WB_MUX); mux_cfg &= ~0xf; - if (pp) + if (pp >= PINGPONG_CWB_0) + mux_cfg |= (pp < PINGPONG_CWB_2) ? 0xd : 0xb; + else if (pp) mux_cfg |= (pp - PINGPONG_0) & 0x7; else mux_cfg |= 0xf; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 8b251f87a052..97e9cb8c2b09 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -51,6 +51,9 @@ #define DPU_DEBUGFS_DIR "msm_dpu" #define DPU_DEBUGFS_HWMASKNAME "hw_log_mask" +bool dpu_use_virtual_planes; +module_param(dpu_use_virtual_planes, bool, 0); + static int dpu_kms_hw_init(struct msm_kms *kms); static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms); @@ -829,8 +832,11 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms) type, catalog->sspp[i].features, catalog->sspp[i].features & BIT(DPU_SSPP_CURSOR)); - plane = dpu_plane_init(dev, catalog->sspp[i].id, type, - (1UL << max_crtc_count) - 1); + if (dpu_use_virtual_planes) + plane = dpu_plane_init_virtual(dev, type, (1UL << max_crtc_count) - 1); + else + plane = dpu_plane_init(dev, catalog->sspp[i].id, type, + (1UL << max_crtc_count) - 1); if (IS_ERR(plane)) { DPU_ERROR("dpu_plane_init failed\n"); ret = PTR_ERR(plane); @@ -932,12 +938,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump CTL sub-blocks HW regs info */ for (i = 0; i < cat->ctl_count; i++) msm_disp_snapshot_add_block(disp_state, cat->ctl[i].len, - dpu_kms->mmio + cat->ctl[i].base, cat->ctl[i].name); + dpu_kms->mmio + cat->ctl[i].base, "%s", + cat->ctl[i].name); /* dump DSPP sub-blocks HW regs info */ for (i = 0; i < cat->dspp_count; i++) { base = dpu_kms->mmio + cat->dspp[i].base; - msm_disp_snapshot_add_block(disp_state, cat->dspp[i].len, base, cat->dspp[i].name); + msm_disp_snapshot_add_block(disp_state, cat->dspp[i].len, base, + "%s", cat->dspp[i].name); if (cat->dspp[i].sblk && cat->dspp[i].sblk->pcc.len > 0) msm_disp_snapshot_add_block(disp_state, cat->dspp[i].sblk->pcc.len, @@ -949,13 +957,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump INTF sub-blocks HW regs info */ for (i = 0; i < cat->intf_count; i++) msm_disp_snapshot_add_block(disp_state, cat->intf[i].len, - dpu_kms->mmio + cat->intf[i].base, cat->intf[i].name); + dpu_kms->mmio + cat->intf[i].base, "%s", + cat->intf[i].name); /* dump PP sub-blocks HW regs info */ for (i = 0; i < cat->pingpong_count; i++) { base = dpu_kms->mmio + cat->pingpong[i].base; msm_disp_snapshot_add_block(disp_state, cat->pingpong[i].len, base, - cat->pingpong[i].name); + "%s", cat->pingpong[i].name); /* TE2 sub-block has length of 0, so will not print it */ @@ -969,7 +978,8 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump SSPP sub-blocks HW regs info */ for (i = 0; i < cat->sspp_count; i++) { base = dpu_kms->mmio + cat->sspp[i].base; - msm_disp_snapshot_add_block(disp_state, cat->sspp[i].len, base, cat->sspp[i].name); + msm_disp_snapshot_add_block(disp_state, cat->sspp[i].len, base, + "%s", cat->sspp[i].name); if (cat->sspp[i].sblk && cat->sspp[i].sblk->scaler_blk.len > 0) msm_disp_snapshot_add_block(disp_state, cat->sspp[i].sblk->scaler_blk.len, @@ -987,12 +997,14 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k /* dump LM sub-blocks HW regs info */ for (i = 0; i < cat->mixer_count; i++) msm_disp_snapshot_add_block(disp_state, cat->mixer[i].len, - dpu_kms->mmio + cat->mixer[i].base, cat->mixer[i].name); + dpu_kms->mmio + cat->mixer[i].base, + "%s", cat->mixer[i].name); /* dump WB sub-blocks HW regs info */ for (i = 0; i < cat->wb_count; i++) msm_disp_snapshot_add_block(disp_state, cat->wb[i].len, - dpu_kms->mmio + cat->wb[i].base, cat->wb[i].name); + dpu_kms->mmio + cat->wb[i].base, "%s", + cat->wb[i].name); if (cat->mdp[0].features & BIT(DPU_MDP_PERIPH_0_REMOVED)) { msm_disp_snapshot_add_block(disp_state, MDP_PERIPH_TOP0, @@ -1004,10 +1016,16 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k dpu_kms->mmio + cat->mdp[0].base, "top"); } + /* dump CWB sub-blocks HW regs info */ + for (i = 0; i < cat->cwb_count; i++) + msm_disp_snapshot_add_block(disp_state, cat->cwb[i].len, + dpu_kms->mmio + cat->cwb[i].base, cat->cwb[i].name); + /* dump DSC sub-blocks HW regs info */ for (i = 0; i < cat->dsc_count; i++) { base = dpu_kms->mmio + cat->dsc[i].base; - msm_disp_snapshot_add_block(disp_state, cat->dsc[i].len, base, cat->dsc[i].name); + msm_disp_snapshot_add_block(disp_state, cat->dsc[i].len, base, + "%s", cat->dsc[i].name); if (cat->dsc[i].features & BIT(DPU_DSC_HW_REV_1_2)) { struct dpu_dsc_blk enc = cat->dsc[i].sblk->enc; @@ -1022,7 +1040,16 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k if (cat->cdm) msm_disp_snapshot_add_block(disp_state, cat->cdm->len, - dpu_kms->mmio + cat->cdm->base, cat->cdm->name); + dpu_kms->mmio + cat->cdm->base, + "%s", cat->cdm->name); + + for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { + const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; + + msm_disp_snapshot_add_block(disp_state, vbif->len, + dpu_kms->vbif[vbif->id] + vbif->base, + "%s", vbif->name); + } pm_runtime_put_sync(&dpu_kms->pdev->dev); } @@ -1478,6 +1505,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sc8280xp-dpu", .data = &dpu_sc8280xp_cfg, }, { .compatible = "qcom,sm6115-dpu", .data = &dpu_sm6115_cfg, }, { .compatible = "qcom,sm6125-dpu", .data = &dpu_sm6125_cfg, }, + { .compatible = "qcom,sm6150-dpu", .data = &dpu_sm6150_cfg, }, { .compatible = "qcom,sm6350-dpu", .data = &dpu_sm6350_cfg, }, { .compatible = "qcom,sm6375-dpu", .data = &dpu_sm6375_cfg, }, { .compatible = "qcom,sm7150-dpu", .data = &dpu_sm7150_cfg, }, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 88d64d43ea1a..547cdb2c0c78 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -54,6 +54,8 @@ #define ktime_compare_safe(A, B) \ ktime_compare(ktime_sub((A), (B)), ktime_set(0, 0)) +extern bool dpu_use_virtual_planes; + struct dpu_kms { struct msm_kms base; struct drm_device *dev; @@ -128,6 +130,8 @@ struct dpu_global_state { uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0]; uint32_t dsc_to_enc_id[DSC_MAX - DSC_0]; uint32_t cdm_to_enc_id; + + uint32_t sspp_to_crtc_id[SSPP_MAX - SSPP_NONE]; }; struct dpu_global_state diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 3ffac24333a2..098abc2c0003 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -20,7 +20,6 @@ #include "msm_drv.h" #include "msm_mdss.h" #include "dpu_kms.h" -#include "dpu_formats.h" #include "dpu_hw_sspp.h" #include "dpu_hw_util.h" #include "dpu_trace.h" @@ -878,7 +877,7 @@ static int dpu_plane_atomic_check_nosspp(struct drm_plane *plane, drm_rect_rotate_inv(&pipe_cfg->src_rect, new_plane_state->fb->width, new_plane_state->fb->height, new_plane_state->rotation); - if (r_pipe_cfg->src_rect.x1 != 0) + if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) drm_rect_rotate_inv(&r_pipe_cfg->src_rect, new_plane_state->fb->width, new_plane_state->fb->height, new_plane_state->rotation); @@ -888,6 +887,32 @@ static int dpu_plane_atomic_check_nosspp(struct drm_plane *plane, return 0; } +static int dpu_plane_is_multirect_parallel_capable(struct dpu_hw_sspp *sspp, + struct dpu_sw_pipe_cfg *pipe_cfg, + const struct msm_format *fmt, + uint32_t max_linewidth) +{ + if (drm_rect_width(&pipe_cfg->src_rect) != drm_rect_width(&pipe_cfg->dst_rect) || + drm_rect_height(&pipe_cfg->src_rect) != drm_rect_height(&pipe_cfg->dst_rect)) + return false; + + if (pipe_cfg->rotation & DRM_MODE_ROTATE_90) + return false; + + if (MSM_FORMAT_IS_YUV(fmt)) + return false; + + if (MSM_FORMAT_IS_UBWC(fmt) && + drm_rect_width(&pipe_cfg->src_rect) > max_linewidth / 2) + return false; + + if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) && + !test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features)) + return false; + + return true; +} + static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, struct drm_atomic_state *state, const struct drm_crtc_state *crtc_state) @@ -901,7 +926,6 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, const struct msm_format *fmt; struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; - uint32_t max_linewidth; uint32_t supported_rotations; const struct dpu_sspp_cfg *pipe_hw_caps; const struct dpu_sspp_sub_blks *sblk; @@ -923,8 +947,6 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, fmt = msm_framebuffer_format(new_plane_state->fb); - max_linewidth = pdpu->catalog->caps->max_linewidth; - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) @@ -940,48 +962,43 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, return ret; if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { - /* - * In parallel multirect case only the half of the usual width - * is supported for tiled formats. If we are here, we know that - * full width is more than max_linewidth, thus each rect is - * wider than allowed. - */ - if (MSM_FORMAT_IS_UBWC(fmt) && - drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { - DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n", - DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); - return -E2BIG; - } + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, + &crtc_state->adjusted_mode); + if (ret) + return ret; + } - if (drm_rect_width(&pipe_cfg->src_rect) != drm_rect_width(&pipe_cfg->dst_rect) || - drm_rect_height(&pipe_cfg->src_rect) != drm_rect_height(&pipe_cfg->dst_rect) || - (!test_bit(DPU_SSPP_SMART_DMA_V1, &pipe->sspp->cap->features) && - !test_bit(DPU_SSPP_SMART_DMA_V2, &pipe->sspp->cap->features)) || - pipe_cfg->rotation & DRM_MODE_ROTATE_90 || - MSM_FORMAT_IS_YUV(fmt)) { - DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, can't use split source\n", - DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); - return -E2BIG; - } + return 0; +} + +static bool dpu_plane_try_multirect_parallel(struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, + struct dpu_sw_pipe *r_pipe, struct dpu_sw_pipe_cfg *r_pipe_cfg, + struct dpu_hw_sspp *sspp, const struct msm_format *fmt, + uint32_t max_linewidth) +{ + r_pipe->sspp = NULL; + + pipe->multirect_index = DPU_SSPP_RECT_SOLO; + pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; + r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { + if (!dpu_plane_is_multirect_parallel_capable(pipe->sspp, pipe_cfg, fmt, max_linewidth) || + !dpu_plane_is_multirect_parallel_capable(pipe->sspp, r_pipe_cfg, fmt, max_linewidth)) + return false; + + r_pipe->sspp = pipe->sspp; - /* - * Use multirect for wide plane. We do not support dynamic - * assignment of SSPPs, so we know the configuration. - */ pipe->multirect_index = DPU_SSPP_RECT_0; pipe->multirect_mode = DPU_SSPP_MULTIRECT_PARALLEL; - r_pipe->sspp = pipe->sspp; r_pipe->multirect_index = DPU_SSPP_RECT_1; r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_PARALLEL; - - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, - &crtc_state->adjusted_mode); - if (ret) - return ret; } - return 0; + return true; } static int dpu_plane_atomic_check(struct drm_plane *plane, @@ -995,14 +1012,19 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; + struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; + struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; const struct drm_crtc_state *crtc_state = NULL; + uint32_t max_linewidth = dpu_kms->catalog->caps->max_linewidth; if (new_plane_state->crtc) crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_state->crtc); pipe->sspp = dpu_rm_get_sspp(&dpu_kms->rm, pdpu->pipe); - r_pipe->sspp = NULL; + + if (!pipe->sspp) + return -EINVAL; ret = dpu_plane_atomic_check_nosspp(plane, new_plane_state, crtc_state); if (ret) @@ -1011,14 +1033,155 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, if (!new_plane_state->visible) return 0; - pipe->multirect_index = DPU_SSPP_RECT_SOLO; - pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; - r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; - r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + if (!dpu_plane_try_multirect_parallel(pipe, pipe_cfg, r_pipe, r_pipe_cfg, + pipe->sspp, + msm_framebuffer_format(new_plane_state->fb), + max_linewidth)) { + DPU_DEBUG_PLANE(pdpu, "invalid " DRM_RECT_FMT " /" DRM_RECT_FMT + " max_line:%u, can't use split source\n", + DRM_RECT_ARG(&pipe_cfg->src_rect), + DRM_RECT_ARG(&r_pipe_cfg->src_rect), + max_linewidth); + return -E2BIG; + } return dpu_plane_atomic_check_sspp(plane, state, crtc_state); } +static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(state, plane); + struct drm_plane_state *old_plane_state = + drm_atomic_get_old_plane_state(state, plane); + struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); + struct drm_crtc_state *crtc_state; + int ret; + + if (plane_state->crtc) + crtc_state = drm_atomic_get_new_crtc_state(state, + plane_state->crtc); + + ret = dpu_plane_atomic_check_nosspp(plane, plane_state, crtc_state); + if (ret) + return ret; + + if (!plane_state->visible) { + /* + * resources are freed by dpu_crtc_assign_plane_resources(), + * but clean them here. + */ + pstate->pipe.sspp = NULL; + pstate->r_pipe.sspp = NULL; + + return 0; + } + + /* + * Force resource reallocation if the format of FB or src/dst have + * changed. We might need to allocate different SSPP or SSPPs for this + * plane than the one used previously. + */ + if (!old_plane_state || !old_plane_state->fb || + old_plane_state->src_w != plane_state->src_w || + old_plane_state->src_h != plane_state->src_h || + old_plane_state->src_w != plane_state->src_w || + old_plane_state->crtc_h != plane_state->crtc_h || + msm_framebuffer_format(old_plane_state->fb) != + msm_framebuffer_format(plane_state->fb)) + crtc_state->planes_changed = true; + + return 0; +} + +static int dpu_plane_virtual_assign_resources(struct drm_crtc *crtc, + struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_plane_state *plane_state) +{ + const struct drm_crtc_state *crtc_state = NULL; + struct drm_plane *plane = plane_state->plane; + struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); + struct dpu_rm_sspp_requirements reqs; + struct dpu_plane_state *pstate; + struct dpu_sw_pipe *pipe; + struct dpu_sw_pipe *r_pipe; + struct dpu_sw_pipe_cfg *pipe_cfg; + struct dpu_sw_pipe_cfg *r_pipe_cfg; + const struct msm_format *fmt; + + if (plane_state->crtc) + crtc_state = drm_atomic_get_new_crtc_state(state, + plane_state->crtc); + + pstate = to_dpu_plane_state(plane_state); + pipe = &pstate->pipe; + r_pipe = &pstate->r_pipe; + pipe_cfg = &pstate->pipe_cfg; + r_pipe_cfg = &pstate->r_pipe_cfg; + + pipe->sspp = NULL; + r_pipe->sspp = NULL; + + if (!plane_state->fb) + return -EINVAL; + + fmt = msm_framebuffer_format(plane_state->fb); + reqs.yuv = MSM_FORMAT_IS_YUV(fmt); + reqs.scale = (plane_state->src_w >> 16 != plane_state->crtc_w) || + (plane_state->src_h >> 16 != plane_state->crtc_h); + + reqs.rot90 = drm_rotation_90_or_270(plane_state->rotation); + + pipe->sspp = dpu_rm_reserve_sspp(&dpu_kms->rm, global_state, crtc, &reqs); + if (!pipe->sspp) + return -ENODEV; + + if (!dpu_plane_try_multirect_parallel(pipe, pipe_cfg, r_pipe, r_pipe_cfg, + pipe->sspp, + msm_framebuffer_format(plane_state->fb), + dpu_kms->catalog->caps->max_linewidth)) { + /* multirect is not possible, use two SSPP blocks */ + r_pipe->sspp = dpu_rm_reserve_sspp(&dpu_kms->rm, global_state, crtc, &reqs); + if (!r_pipe->sspp) + return -ENODEV; + + pipe->multirect_index = DPU_SSPP_RECT_SOLO; + pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + + r_pipe->multirect_index = DPU_SSPP_RECT_SOLO; + r_pipe->multirect_mode = DPU_SSPP_MULTIRECT_NONE; + } + + return dpu_plane_atomic_check_sspp(plane, state, crtc_state); +} + +int dpu_assign_plane_resources(struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_crtc *crtc, + struct drm_plane_state **states, + unsigned int num_planes) +{ + unsigned int i; + int ret; + + for (i = 0; i < num_planes; i++) { + struct drm_plane_state *plane_state = states[i]; + + if (!plane_state || + !plane_state->visible) + continue; + + ret = dpu_plane_virtual_assign_resources(crtc, global_state, + state, plane_state); + if (ret) + break; + } + + return ret; +} + static void dpu_plane_flush_csc(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe) { const struct msm_format *format = @@ -1335,12 +1498,15 @@ static void dpu_plane_atomic_print_state(struct drm_printer *p, drm_printf(p, "\tstage=%d\n", pstate->stage); - drm_printf(p, "\tsspp[0]=%s\n", pipe->sspp->cap->name); - drm_printf(p, "\tmultirect_mode[0]=%s\n", dpu_get_multirect_mode(pipe->multirect_mode)); - drm_printf(p, "\tmultirect_index[0]=%s\n", - dpu_get_multirect_index(pipe->multirect_index)); - drm_printf(p, "\tsrc[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->src_rect)); - drm_printf(p, "\tdst[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->dst_rect)); + if (pipe->sspp) { + drm_printf(p, "\tsspp[0]=%s\n", pipe->sspp->cap->name); + drm_printf(p, "\tmultirect_mode[0]=%s\n", + dpu_get_multirect_mode(pipe->multirect_mode)); + drm_printf(p, "\tmultirect_index[0]=%s\n", + dpu_get_multirect_index(pipe->multirect_index)); + drm_printf(p, "\tsrc[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->src_rect)); + drm_printf(p, "\tdst[0]=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&pipe_cfg->dst_rect)); + } if (r_pipe->sspp) { drm_printf(p, "\tsspp[1]=%s\n", r_pipe->sspp->cap->name); @@ -1433,39 +1599,29 @@ static const struct drm_plane_helper_funcs dpu_plane_helper_funcs = { .atomic_update = dpu_plane_atomic_update, }; -/** - * dpu_plane_init - create new dpu plane for the given pipe - * @dev: Pointer to DRM device - * @pipe: dpu hardware pipe identifier - * @type: Plane type - PRIMARY/OVERLAY/CURSOR - * @possible_crtcs: bitmask of crtc that can be attached to the given pipe - * - * Initialize the plane. - */ -struct drm_plane *dpu_plane_init(struct drm_device *dev, - uint32_t pipe, enum drm_plane_type type, - unsigned long possible_crtcs) +static const struct drm_plane_helper_funcs dpu_plane_virtual_helper_funcs = { + .prepare_fb = dpu_plane_prepare_fb, + .cleanup_fb = dpu_plane_cleanup_fb, + .atomic_check = dpu_plane_virtual_atomic_check, + .atomic_update = dpu_plane_atomic_update, +}; + +/* initialize plane */ +static struct drm_plane *dpu_plane_init_common(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs, + bool inline_rotation, + const uint32_t *format_list, + uint32_t num_formats, + enum dpu_sspp pipe) { struct drm_plane *plane = NULL; - const uint32_t *format_list; struct dpu_plane *pdpu; struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *kms = to_dpu_kms(priv->kms); - struct dpu_hw_sspp *pipe_hw; - uint32_t num_formats; uint32_t supported_rotations; int ret; - /* initialize underlying h/w driver */ - pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); - if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { - DPU_ERROR("[%u]SSPP is invalid\n", pipe); - return ERR_PTR(-EINVAL); - } - - format_list = pipe_hw->cap->sblk->format_list; - num_formats = pipe_hw->cap->sblk->num_formats; - pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base, 0xff, &dpu_plane_funcs, format_list, num_formats, @@ -1491,7 +1647,7 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180; - if (pipe_hw->cap->features & BIT(DPU_SSPP_INLINE_ROTATION)) + if (inline_rotation) supported_rotations |= DRM_MODE_ROTATE_MASK; drm_plane_create_rotation_property(plane, @@ -1499,10 +1655,98 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, drm_plane_enable_fb_damage_clips(plane); - /* success! finalize initialization */ + DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, + pipe, plane->base.id); + return plane; +} + +/** + * dpu_plane_init - create new dpu plane for the given pipe + * @dev: Pointer to DRM device + * @pipe: dpu hardware pipe identifier + * @type: Plane type - PRIMARY/OVERLAY/CURSOR + * @possible_crtcs: bitmask of crtc that can be attached to the given pipe + * + * Initialize the plane. + */ +struct drm_plane *dpu_plane_init(struct drm_device *dev, + uint32_t pipe, enum drm_plane_type type, + unsigned long possible_crtcs) +{ + struct drm_plane *plane = NULL; + struct msm_drm_private *priv = dev->dev_private; + struct dpu_kms *kms = to_dpu_kms(priv->kms); + struct dpu_hw_sspp *pipe_hw; + + /* initialize underlying h/w driver */ + pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); + if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { + DPU_ERROR("[%u]SSPP is invalid\n", pipe); + return ERR_PTR(-EINVAL); + } + + + plane = dpu_plane_init_common(dev, type, possible_crtcs, + pipe_hw->cap->features & BIT(DPU_SSPP_INLINE_ROTATION), + pipe_hw->cap->sblk->format_list, + pipe_hw->cap->sblk->num_formats, + pipe); + if (IS_ERR(plane)) + return plane; + drm_plane_helper_add(plane, &dpu_plane_helper_funcs); DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, pipe, plane->base.id); + + return plane; +} + +/** + * dpu_plane_init_virtual - create new virtualized DPU plane + * @dev: Pointer to DRM device + * @type: Plane type - PRIMARY/OVERLAY/CURSOR + * @possible_crtcs: bitmask of crtc that can be attached to the given pipe + * + * Initialize the virtual plane with no backing SSPP / pipe. + */ +struct drm_plane *dpu_plane_init_virtual(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs) +{ + struct drm_plane *plane = NULL; + struct msm_drm_private *priv = dev->dev_private; + struct dpu_kms *kms = to_dpu_kms(priv->kms); + bool has_inline_rotation = false; + const u32 *format_list = NULL; + u32 num_formats = 0; + int i; + + /* Determine the largest configuration that we can implement */ + for (i = 0; i < kms->catalog->sspp_count; i++) { + const struct dpu_sspp_cfg *cfg = &kms->catalog->sspp[i]; + + if (test_bit(DPU_SSPP_INLINE_ROTATION, &cfg->features)) + has_inline_rotation = true; + + if (!format_list || + cfg->sblk->csc_blk.len) { + format_list = cfg->sblk->format_list; + num_formats = cfg->sblk->num_formats; + } + } + + plane = dpu_plane_init_common(dev, type, possible_crtcs, + has_inline_rotation, + format_list, + num_formats, + SSPP_NONE); + if (IS_ERR(plane)) + return plane; + + drm_plane_helper_add(plane, &dpu_plane_virtual_helper_funcs); + + DPU_DEBUG("%s created virtual id:%u\n", plane->name, plane->base.id); + return plane; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h index 97090ca7842b..acd5725175cd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h @@ -62,10 +62,23 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, uint32_t pipe, enum drm_plane_type type, unsigned long possible_crtcs); +struct drm_plane *dpu_plane_init_virtual(struct drm_device *dev, + enum drm_plane_type type, + unsigned long possible_crtcs); + +int dpu_plane_color_fill(struct drm_plane *plane, + uint32_t color, uint32_t alpha); + #ifdef CONFIG_DEBUG_FS void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable); #else static inline void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable) {} #endif +int dpu_assign_plane_resources(struct dpu_global_state *global_state, + struct drm_atomic_state *state, + struct drm_crtc *crtc, + struct drm_plane_state **states, + unsigned int num_planes); + #endif /* _DPU_PLANE_H_ */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index c247af03dc8e..5baf9df702b8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #define pr_fmt(fmt) "[drm:%s] " fmt, __func__ @@ -9,6 +9,7 @@ #include "dpu_hw_lm.h" #include "dpu_hw_ctl.h" #include "dpu_hw_cdm.h" +#include "dpu_hw_cwb.h" #include "dpu_hw_pingpong.h" #include "dpu_hw_sspp.h" #include "dpu_hw_intf.h" @@ -27,14 +28,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx, } /** - * struct dpu_rm_requirements - Reservation requirements parameter bundle - * @topology: selected topology for the display - */ -struct dpu_rm_requirements { - struct msm_display_topology topology; -}; - -/** * dpu_rm_init - Read hardware catalog and create reservation tracking objects * for all HW blocks. * @dev: Corresponding device for devres management @@ -130,6 +123,19 @@ int dpu_rm_init(struct drm_device *dev, rm->hw_wb[wb->id - WB_0] = hw; } + for (i = 0; i < cat->cwb_count; i++) { + struct dpu_hw_cwb *hw; + const struct dpu_cwb_cfg *cwb = &cat->cwb[i]; + + hw = dpu_hw_cwb_init(dev, cwb, mmio); + if (IS_ERR(hw)) { + rc = PTR_ERR(hw); + DPU_ERROR("failed cwb object creation: err %d\n", rc); + goto fail; + } + rm->cwb_blks[cwb->id - CWB_0] = &hw->base; + } + for (i = 0; i < cat->ctl_count; i++) { struct dpu_hw_ctl *hw; const struct dpu_ctl_cfg *ctl = &cat->ctl[i]; @@ -241,14 +247,13 @@ static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx) * mixer in rm->pingpong_blks[]. * @dspp_idx: output parameter, index of dspp block attached to the layer * mixer in rm->dspp_blks[]. - * @reqs: input parameter, rm requirements for HW blocks needed in the - * datapath. + * @topology: selected topology for the display * Return: true if lm matches all requirements, false otherwise */ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, int lm_idx, int *pp_idx, int *dspp_idx, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { const struct dpu_lm_cfg *lm_cfg; int idx; @@ -273,7 +278,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, } *pp_idx = idx; - if (!reqs->topology.num_dspp) + if (!topology->num_dspp) return true; idx = lm_cfg->dspp - DSPP_0; @@ -295,7 +300,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, static int _dpu_rm_reserve_lms(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { int lm_idx[MAX_BLOCKS]; @@ -303,14 +308,14 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, int dspp_idx[MAX_BLOCKS] = {0}; int i, lm_count = 0; - if (!reqs->topology.num_lm) { - DPU_ERROR("invalid number of lm: %d\n", reqs->topology.num_lm); + if (!topology->num_lm) { + DPU_ERROR("invalid number of lm: %d\n", topology->num_lm); return -EINVAL; } /* Find a primary mixer */ for (i = 0; i < ARRAY_SIZE(rm->mixer_blks) && - lm_count < reqs->topology.num_lm; i++) { + lm_count < topology->num_lm; i++) { if (!rm->mixer_blks[i]) continue; @@ -319,14 +324,14 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, if (!_dpu_rm_check_lm_and_get_connected_blks(rm, global_state, enc_id, i, &pp_idx[lm_count], - &dspp_idx[lm_count], reqs)) { + &dspp_idx[lm_count], topology)) { continue; } ++lm_count; /* Valid primary mixer found, find matching peers */ - if (lm_count < reqs->topology.num_lm) { + if (lm_count < topology->num_lm) { int j = _dpu_rm_get_lm_peer(rm, i); /* ignore the peer if there is an error or if the peer was already processed */ @@ -339,7 +344,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, if (!_dpu_rm_check_lm_and_get_connected_blks(rm, global_state, enc_id, j, &pp_idx[lm_count], &dspp_idx[lm_count], - reqs)) { + topology)) { continue; } @@ -348,7 +353,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, } } - if (lm_count != reqs->topology.num_lm) { + if (lm_count != topology->num_lm) { DPU_DEBUG("unable to find appropriate mixers\n"); return -ENAVAIL; } @@ -357,7 +362,7 @@ static int _dpu_rm_reserve_lms(struct dpu_rm *rm, global_state->mixer_to_enc_id[lm_idx[i]] = enc_id; global_state->pingpong_to_enc_id[pp_idx[i]] = enc_id; global_state->dspp_to_enc_id[dspp_idx[i]] = - reqs->topology.num_dspp ? enc_id : 0; + topology->num_dspp ? enc_id : 0; trace_dpu_rm_reserve_lms(lm_idx[i] + LM_0, enc_id, pp_idx[i] + PINGPONG_0); @@ -594,28 +599,28 @@ static int _dpu_rm_make_reservation( struct dpu_rm *rm, struct dpu_global_state *global_state, struct drm_encoder *enc, - struct dpu_rm_requirements *reqs) + struct msm_display_topology *topology) { int ret; - ret = _dpu_rm_reserve_lms(rm, global_state, enc->base.id, reqs); + ret = _dpu_rm_reserve_lms(rm, global_state, enc->base.id, topology); if (ret) { DPU_ERROR("unable to find appropriate mixers\n"); return ret; } ret = _dpu_rm_reserve_ctls(rm, global_state, enc->base.id, - &reqs->topology); + topology); if (ret) { DPU_ERROR("unable to find appropriate CTL\n"); return ret; } - ret = _dpu_rm_reserve_dsc(rm, global_state, enc, &reqs->topology); + ret = _dpu_rm_reserve_dsc(rm, global_state, enc, topology); if (ret) return ret; - if (reqs->topology.needs_cdm) { + if (topology->needs_cdm) { ret = _dpu_rm_reserve_cdm(rm, global_state, enc); if (ret) { DPU_ERROR("unable to find CDM blk\n"); @@ -626,20 +631,6 @@ static int _dpu_rm_make_reservation( return ret; } -static int _dpu_rm_populate_requirements( - struct drm_encoder *enc, - struct dpu_rm_requirements *reqs, - struct msm_display_topology req_topology) -{ - reqs->topology = req_topology; - - DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n", - reqs->topology.num_lm, reqs->topology.num_dsc, - reqs->topology.num_intf, reqs->topology.needs_cdm); - - return 0; -} - static void _dpu_rm_clear_mapping(uint32_t *res_mapping, int cnt, uint32_t enc_id) { @@ -693,9 +684,8 @@ int dpu_rm_reserve( struct dpu_global_state *global_state, struct drm_encoder *enc, struct drm_crtc_state *crtc_state, - struct msm_display_topology topology) + struct msm_display_topology *topology) { - struct dpu_rm_requirements reqs; int ret; /* Check if this is just a page-flip */ @@ -710,13 +700,11 @@ int dpu_rm_reserve( DRM_DEBUG_KMS("reserving hw for enc %d crtc %d\n", enc->base.id, crtc_state->crtc->base.id); - ret = _dpu_rm_populate_requirements(enc, &reqs, topology); - if (ret) { - DPU_ERROR("failed to populate hw requirements\n"); - return ret; - } + DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n", + topology->num_lm, topology->num_dsc, + topology->num_intf); - ret = _dpu_rm_make_reservation(rm, global_state, enc, &reqs); + ret = _dpu_rm_make_reservation(rm, global_state, enc, topology); if (ret) DPU_ERROR("failed to reserve hw resources: %d\n", ret); @@ -725,6 +713,88 @@ int dpu_rm_reserve( return ret; } +static struct dpu_hw_sspp *dpu_rm_try_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs, + unsigned int type) +{ + uint32_t crtc_id = crtc->base.id; + struct dpu_hw_sspp *hw_sspp; + int i; + + for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++) { + if (!rm->hw_sspp[i]) + continue; + + if (global_state->sspp_to_crtc_id[i]) + continue; + + hw_sspp = rm->hw_sspp[i]; + + if (hw_sspp->cap->type != type) + continue; + + if (reqs->scale && !hw_sspp->cap->sblk->scaler_blk.len) + continue; + + // TODO: QSEED2 and RGB scalers are not yet supported + if (reqs->scale && !hw_sspp->ops.setup_scaler) + continue; + + if (reqs->yuv && !hw_sspp->cap->sblk->csc_blk.len) + continue; + + if (reqs->rot90 && !(hw_sspp->cap->features & DPU_SSPP_INLINE_ROTATION)) + continue; + + global_state->sspp_to_crtc_id[i] = crtc_id; + + return rm->hw_sspp[i]; + } + + return NULL; +} + +/** + * dpu_rm_reserve_sspp - Reserve the required SSPP for the provided CRTC + * @rm: DPU Resource Manager handle + * @global_state: private global state + * @crtc: DRM CRTC handle + * @reqs: SSPP required features + */ +struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs) +{ + struct dpu_hw_sspp *hw_sspp = NULL; + + if (!reqs->scale && !reqs->yuv) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_DMA); + if (!hw_sspp && reqs->scale) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_RGB); + if (!hw_sspp) + hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_VIG); + + return hw_sspp; +} + +/** + * dpu_rm_release_all_sspp - Given the CRTC, release all SSPP + * blocks previously reserved for that use case. + * @global_state: resources shared across multiple kms objects + * @crtc: DRM CRTC handle + */ +void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, + struct drm_crtc *crtc) +{ + uint32_t crtc_id = crtc->base.id; + + _dpu_rm_clear_mapping(global_state->sspp_to_crtc_id, + ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id); +} + /** * dpu_rm_get_assigned_resources - Get hw resources of the given type that are * assigned to this encoder @@ -859,4 +929,11 @@ void dpu_rm_print_state(struct drm_printer *p, dpu_rm_print_state_helper(p, rm->cdm_blk, global_state->cdm_to_enc_id); drm_puts(p, "\n"); + + drm_puts(p, "\tsspp="); + /* skip SSPP_NONE and start from the next index */ + for (i = SSPP_NONE + 1; i < ARRAY_SIZE(global_state->sspp_to_crtc_id); i++) + dpu_rm_print_state_helper(p, rm->hw_sspp[i] ? &rm->hw_sspp[i]->base : NULL, + global_state->sspp_to_crtc_id[i]); + drm_puts(p, "\n"); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h index ea0e49cb7b0d..99bd594ee0d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h @@ -20,6 +20,7 @@ struct dpu_global_state; * @ctl_blks: array of ctl hardware resources * @hw_intf: array of intf hardware resources * @hw_wb: array of wb hardware resources + * @hw_cwb: array of cwb hardware resources * @dspp_blks: array of dspp hardware resources * @hw_sspp: array of sspp hardware resources * @cdm_blk: cdm hardware resource @@ -30,6 +31,7 @@ struct dpu_rm { struct dpu_hw_blk *ctl_blks[CTL_MAX - CTL_0]; struct dpu_hw_intf *hw_intf[INTF_MAX - INTF_0]; struct dpu_hw_wb *hw_wb[WB_MAX - WB_0]; + struct dpu_hw_blk *cwb_blks[CWB_MAX - CWB_0]; struct dpu_hw_blk *dspp_blks[DSPP_MAX - DSPP_0]; struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0]; struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0]; @@ -37,6 +39,12 @@ struct dpu_rm { struct dpu_hw_blk *cdm_blk; }; +struct dpu_rm_sspp_requirements { + bool yuv; + bool scale; + bool rot90; +}; + /** * struct msm_display_topology - defines a display topology pipeline * @num_lm: number of layer mixers used @@ -63,11 +71,19 @@ int dpu_rm_reserve(struct dpu_rm *rm, struct dpu_global_state *global_state, struct drm_encoder *drm_enc, struct drm_crtc_state *crtc_state, - struct msm_display_topology topology); + struct msm_display_topology *topology); void dpu_rm_release(struct dpu_global_state *global_state, struct drm_encoder *enc); +struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_crtc *crtc, + struct dpu_rm_sspp_requirements *reqs); + +void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, + struct drm_crtc *crtc); + int dpu_rm_get_assigned_resources(struct dpu_rm *rm, struct dpu_global_state *global_state, uint32_t enc_id, enum dpu_hw_blk_type type, struct dpu_hw_blk **blks, int blks_size); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 16f144cbc0c9..8ff496082902 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -42,9 +42,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, if (!conn_state || !conn_state->connector) { DPU_ERROR("invalid connector state\n"); return -EINVAL; - } else if (conn_state->connector->status != connector_status_connected) { - DPU_ERROR("connector not connected %d\n", conn_state->connector->status); - return -EINVAL; } crtc = conn_state->crtc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 576995ddce37..8bbc7fb881d5 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -389,7 +389,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, /* TODO: different regulators in other cases? */ mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v"; - mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[1].supply = "lvds-pll-vdda"; mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda"; ret = devm_regulator_bulk_get(dev->dev, diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c index e75b97127c0d..2be00b11e557 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c @@ -109,7 +109,7 @@ int msm_disp_snapshot_init(struct drm_device *drm_dev) mutex_init(&kms->dump_mutex); - kms->dump_worker = kthread_create_worker(0, "%s", "disp_snapshot"); + kms->dump_worker = kthread_run_worker(0, "%s", "disp_snapshot"); if (IS_ERR(kms->dump_worker)) DRM_ERROR("failed to create disp state task\n"); diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 0fd5e0abaf07..70fdc9fe228a 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -14,6 +14,7 @@ #include "dp_catalog.h" #include "dp_audio.h" #include "dp_panel.h" +#include "dp_reg.h" #include "dp_display.h" #include "dp_utils.h" @@ -28,251 +29,64 @@ struct msm_dp_audio_private { struct msm_dp_audio msm_dp_audio; }; -static u32 msm_dp_audio_get_header(struct msm_dp_catalog *catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) -{ - return msm_dp_catalog_audio_get_header(catalog, sdp, header); -} - -static void msm_dp_audio_set_header(struct msm_dp_catalog *catalog, - u32 data, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) -{ - msm_dp_catalog_audio_set_header(catalog, sdp, header, data); -} - static void msm_dp_audio_stream_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x02; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_2); - new_value = value; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_3); - - new_value = audio->channels - 1; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_STREAM, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x02, + .HB2 = 0x00, + .HB3 = audio->channels - 1, + }; + + msm_dp_catalog_write_audio_stream(audio->catalog, &sdp_hdr); } static void msm_dp_audio_timestamp_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x1; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x17; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_3); - - new_value = (0x0 | (0x11 << 2)); - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_TIMESTAMP, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x01, + .HB2 = 0x17, + .HB3 = 0x0 | (0x11 << 2), + }; + + msm_dp_catalog_write_audio_timestamp(audio->catalog, &sdp_hdr); } static void msm_dp_audio_infoframe_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x84; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x1b; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_3); - - new_value = (0x0 | (0x11 << 2)); - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - new_value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_INFOFRAME, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x84, + .HB2 = 0x1b, + .HB3 = 0x0 | (0x11 << 2), + }; + + msm_dp_catalog_write_audio_infoframe(audio->catalog, &sdp_hdr); } static void msm_dp_audio_copy_management_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x05; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x0F; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_2); - - /* Config header and parity byte 3 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_3); - - new_value = 0x0; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_3_BIT) - | (parity_byte << PARITY_BYTE_3_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 3: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_COPYMANAGEMENT, DP_AUDIO_SDP_HEADER_3); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x05, + .HB2 = 0x0f, + .HB3 = 0x00, + }; + + msm_dp_catalog_write_audio_copy_mgmt(audio->catalog, &sdp_hdr); } static void msm_dp_audio_isrc_sdp(struct msm_dp_audio_private *audio) { - struct msm_dp_catalog *catalog = audio->catalog; - u32 value, new_value; - u8 parity_byte; - - /* Config header and parity byte 1 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_1); - - new_value = 0x06; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_1_BIT) - | (parity_byte << PARITY_BYTE_1_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 1: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_1); - - /* Config header and parity byte 2 */ - value = msm_dp_audio_get_header(catalog, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_2); - - new_value = 0x0F; - parity_byte = msm_dp_utils_calculate_parity(new_value); - value |= ((new_value << HEADER_BYTE_2_BIT) - | (parity_byte << PARITY_BYTE_2_BIT)); - drm_dbg_dp(audio->drm_dev, - "Header Byte 2: value = 0x%x, parity_byte = 0x%x\n", - value, parity_byte); - msm_dp_audio_set_header(catalog, value, - DP_AUDIO_SDP_ISRC, DP_AUDIO_SDP_HEADER_2); + struct dp_sdp_header sdp_hdr = { + .HB0 = 0x00, + .HB1 = 0x06, + .HB2 = 0x0f, + .HB3 = 0x00, + }; + + msm_dp_catalog_write_audio_isrc(audio->catalog, &sdp_hdr); } static void msm_dp_audio_setup_sdp(struct msm_dp_audio_private *audio) @@ -329,10 +143,10 @@ static void msm_dp_audio_safe_to_exit_level(struct msm_dp_audio_private *audio) safe_to_exit_level = 5; break; default: + safe_to_exit_level = 14; drm_dbg_dp(audio->drm_dev, "setting the default safe_to_exit_level = %u\n", safe_to_exit_level); - safe_to_exit_level = 14; break; } @@ -539,14 +353,13 @@ int msm_dp_register_audio_driver(struct device *dev, } struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, - struct msm_dp_panel *panel, struct msm_dp_catalog *catalog) { int rc = 0; struct msm_dp_audio_private *audio; struct msm_dp_audio *msm_dp_audio; - if (!pdev || !panel || !catalog) { + if (!pdev || !catalog) { DRM_ERROR("invalid input\n"); rc = -EINVAL; goto error; @@ -563,8 +376,6 @@ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, msm_dp_audio = &audio->msm_dp_audio; - msm_dp_catalog_audio_init(catalog); - return msm_dp_audio; error: return ERR_PTR(rc); diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h index 1c9efaaa40e5..beea34cbab77 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.h +++ b/drivers/gpu/drm/msm/dp/dp_audio.h @@ -8,7 +8,6 @@ #include <linux/platform_device.h> -#include "dp_panel.h" #include "dp_catalog.h" #include <sound/hdmi-codec.h> @@ -28,14 +27,12 @@ struct msm_dp_audio { * Creates and instance of dp audio. * * @pdev: caller's platform device instance. - * @panel: an instance of msm_dp_panel module. * @catalog: an instance of msm_dp_catalog module. * * Returns the error code in case of failure, otherwize * an instance of newly created msm_dp_module. */ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, - struct msm_dp_panel *panel, struct msm_dp_catalog *catalog); /** diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index b4c8856fb25d..7b7eadb2f83b 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -79,7 +79,6 @@ struct msm_dp_catalog_private { struct device *dev; struct drm_device *drm_dev; struct dss_io_data io; - u32 (*audio_map)[DP_AUDIO_SDP_HEADER_MAX]; struct msm_dp_catalog msm_dp_catalog; }; @@ -276,43 +275,6 @@ int msm_dp_catalog_aux_wait_for_hpd_connect_state(struct msm_dp_catalog *msm_dp_ min(wait_us, 2000), wait_us); } -static void dump_regs(void __iomem *base, int len) -{ - int i; - u32 x0, x4, x8, xc; - u32 addr_off = 0; - - len = DIV_ROUND_UP(len, 16); - for (i = 0; i < len; i++) { - x0 = readl_relaxed(base + addr_off); - x4 = readl_relaxed(base + addr_off + 0x04); - x8 = readl_relaxed(base + addr_off + 0x08); - xc = readl_relaxed(base + addr_off + 0x0c); - - pr_info("%08x: %08x %08x %08x %08x", addr_off, x0, x4, x8, xc); - addr_off += 16; - } -} - -void msm_dp_catalog_dump_regs(struct msm_dp_catalog *msm_dp_catalog) -{ - struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); - struct dss_io_data *io = &catalog->io; - - pr_info("AHB regs\n"); - dump_regs(io->ahb.base, io->ahb.len); - - pr_info("AUXCLK regs\n"); - dump_regs(io->aux.base, io->aux.len); - - pr_info("LCLK regs\n"); - dump_regs(io->link.base, io->link.len); - - pr_info("P0CLK regs\n"); - dump_regs(io->p0.base, io->p0.len); -} - u32 msm_dp_catalog_aux_get_irq(struct msm_dp_catalog *msm_dp_catalog) { struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, @@ -1036,7 +998,6 @@ void msm_dp_catalog_panel_tpg_enable(struct msm_dp_catalog *msm_dp_catalog, display_hctl = (hsync_end_x << 16) | hsync_start_x; - msm_dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0x0); msm_dp_write_p0(catalog, MMSS_DP_INTF_HSYNC_CTL, hsync_ctl); msm_dp_write_p0(catalog, MMSS_DP_INTF_VSYNC_PERIOD_F0, vsync_period * hsync_period); @@ -1160,38 +1121,75 @@ struct msm_dp_catalog *msm_dp_catalog_get(struct device *dev) return &catalog->msm_dp_catalog; } -u32 msm_dp_catalog_audio_get_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header) +void msm_dp_catalog_write_audio_stream(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) { - struct msm_dp_catalog_private *catalog; - u32 (*sdp_map)[DP_AUDIO_SDP_HEADER_MAX]; + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); + msm_dp_utils_pack_sdp_header(sdp_hdr, header); + + msm_dp_write_link(catalog, MMSS_DP_AUDIO_STREAM_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_STREAM_1, header[1]); +} + +void msm_dp_catalog_write_audio_timestamp(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - sdp_map = catalog->audio_map; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - return msm_dp_read_link(catalog, sdp_map[sdp][header]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_TIMESTAMP_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_TIMESTAMP_1, header[1]); } -void msm_dp_catalog_audio_set_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header, - u32 data) +void msm_dp_catalog_write_audio_infoframe(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) { - struct msm_dp_catalog_private *catalog; - u32 (*sdp_map)[DP_AUDIO_SDP_HEADER_MAX]; + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - if (!msm_dp_catalog) - return; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_INFOFRAME_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_INFOFRAME_1, header[1]); +} + +void msm_dp_catalog_write_audio_copy_mgmt(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + u32 header[2]; - sdp_map = catalog->audio_map; + msm_dp_utils_pack_sdp_header(sdp_hdr, header); - msm_dp_write_link(catalog, sdp_map[sdp][header], data); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_COPYMANAGEMENT_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_COPYMANAGEMENT_1, header[1]); +} + +void msm_dp_catalog_write_audio_isrc(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr) +{ + struct msm_dp_catalog_private *catalog = container_of(msm_dp_catalog, + struct msm_dp_catalog_private, msm_dp_catalog); + struct dp_sdp_header tmp = *sdp_hdr; + u32 header[2]; + u32 reg; + + /* XXX: is it necessary to preserve this field? */ + reg = msm_dp_read_link(catalog, MMSS_DP_AUDIO_ISRC_1); + tmp.HB3 = FIELD_GET(HEADER_3_MASK, reg); + + msm_dp_utils_pack_sdp_header(&tmp, header); + + msm_dp_write_link(catalog, MMSS_DP_AUDIO_ISRC_0, header[0]); + msm_dp_write_link(catalog, MMSS_DP_AUDIO_ISRC_1, header[1]); } void msm_dp_catalog_audio_config_acr(struct msm_dp_catalog *msm_dp_catalog, u32 select) @@ -1277,47 +1275,6 @@ void msm_dp_catalog_audio_config_sdp(struct msm_dp_catalog *msm_dp_catalog) msm_dp_write_link(catalog, MMSS_DP_SDP_CFG2, sdp_cfg2); } -void msm_dp_catalog_audio_init(struct msm_dp_catalog *msm_dp_catalog) -{ - struct msm_dp_catalog_private *catalog; - - static u32 sdp_map[][DP_AUDIO_SDP_HEADER_MAX] = { - { - MMSS_DP_AUDIO_STREAM_0, - MMSS_DP_AUDIO_STREAM_1, - MMSS_DP_AUDIO_STREAM_1, - }, - { - MMSS_DP_AUDIO_TIMESTAMP_0, - MMSS_DP_AUDIO_TIMESTAMP_1, - MMSS_DP_AUDIO_TIMESTAMP_1, - }, - { - MMSS_DP_AUDIO_INFOFRAME_0, - MMSS_DP_AUDIO_INFOFRAME_1, - MMSS_DP_AUDIO_INFOFRAME_1, - }, - { - MMSS_DP_AUDIO_COPYMANAGEMENT_0, - MMSS_DP_AUDIO_COPYMANAGEMENT_1, - MMSS_DP_AUDIO_COPYMANAGEMENT_1, - }, - { - MMSS_DP_AUDIO_ISRC_0, - MMSS_DP_AUDIO_ISRC_1, - MMSS_DP_AUDIO_ISRC_1, - }, - }; - - if (!msm_dp_catalog) - return; - - catalog = container_of(msm_dp_catalog, - struct msm_dp_catalog_private, msm_dp_catalog); - - catalog->audio_map = sdp_map; -} - void msm_dp_catalog_audio_sfe_level(struct msm_dp_catalog *msm_dp_catalog, u32 safe_to_exit_level) { struct msm_dp_catalog_private *catalog; diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index e932b17eecbf..6678b0ac9a67 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -31,22 +31,6 @@ #define DP_HW_VERSION_1_0 0x10000000 #define DP_HW_VERSION_1_2 0x10020000 -enum msm_dp_catalog_audio_sdp_type { - DP_AUDIO_SDP_STREAM, - DP_AUDIO_SDP_TIMESTAMP, - DP_AUDIO_SDP_INFOFRAME, - DP_AUDIO_SDP_COPYMANAGEMENT, - DP_AUDIO_SDP_ISRC, - DP_AUDIO_SDP_MAX, -}; - -enum msm_dp_catalog_audio_header_type { - DP_AUDIO_SDP_HEADER_1, - DP_AUDIO_SDP_HEADER_2, - DP_AUDIO_SDP_HEADER_3, - DP_AUDIO_SDP_HEADER_MAX, -}; - struct msm_dp_catalog { bool wide_bus_en; }; @@ -104,7 +88,6 @@ int msm_dp_catalog_panel_timing_cfg(struct msm_dp_catalog *msm_dp_catalog, u32 t u32 sync_start, u32 width_blanking, u32 msm_dp_active); void msm_dp_catalog_panel_enable_vsc_sdp(struct msm_dp_catalog *msm_dp_catalog, struct dp_sdp *vsc_sdp); void msm_dp_catalog_panel_disable_vsc_sdp(struct msm_dp_catalog *msm_dp_catalog); -void msm_dp_catalog_dump_regs(struct msm_dp_catalog *msm_dp_catalog); void msm_dp_catalog_panel_tpg_enable(struct msm_dp_catalog *msm_dp_catalog, struct drm_display_mode *drm_mode); void msm_dp_catalog_panel_tpg_disable(struct msm_dp_catalog *msm_dp_catalog); @@ -112,17 +95,19 @@ void msm_dp_catalog_panel_tpg_disable(struct msm_dp_catalog *msm_dp_catalog); struct msm_dp_catalog *msm_dp_catalog_get(struct device *dev); /* DP Audio APIs */ -u32 msm_dp_catalog_audio_get_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header); -void msm_dp_catalog_audio_set_header(struct msm_dp_catalog *msm_dp_catalog, - enum msm_dp_catalog_audio_sdp_type sdp, - enum msm_dp_catalog_audio_header_type header, - u32 data); +void msm_dp_catalog_write_audio_stream(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_timestamp(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_infoframe(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_copy_mgmt(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); +void msm_dp_catalog_write_audio_isrc(struct msm_dp_catalog *msm_dp_catalog, + struct dp_sdp_header *sdp_hdr); void msm_dp_catalog_audio_config_acr(struct msm_dp_catalog *catalog, u32 select); void msm_dp_catalog_audio_enable(struct msm_dp_catalog *catalog, bool enable); void msm_dp_catalog_audio_config_sdp(struct msm_dp_catalog *catalog); -void msm_dp_catalog_audio_init(struct msm_dp_catalog *catalog); void msm_dp_catalog_audio_sfe_level(struct msm_dp_catalog *catalog, u32 safe_to_exit_level); #endif /* _DP_CATALOG_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index bc2ca8133b79..9c463ae2f8fa 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -178,7 +178,6 @@ static void msm_dp_ctrl_configure_source_params(struct msm_dp_ctrl_private *ctrl u32 cc, tb; msm_dp_catalog_ctrl_lane_mapping(ctrl->catalog); - msm_dp_catalog_ctrl_mainlink_ctrl(ctrl->catalog, true); msm_dp_catalog_setup_peripheral_flush(ctrl->catalog); msm_dp_ctrl_config_ctrl(ctrl); @@ -2071,6 +2070,7 @@ void msm_dp_ctrl_off_link(struct msm_dp_ctrl *msm_dp_ctrl) msm_dp_catalog_ctrl_mainlink_ctrl(ctrl->catalog, false); + dev_pm_opp_set_rate(ctrl->dev, 0); msm_dp_ctrl_link_clk_disable(&ctrl->msm_dp_ctrl); DRM_DEBUG_DP("Before, phy=%p init_count=%d power_on=%d\n", diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index aff51bb973eb..24dd37f1682b 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -722,9 +722,6 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) { int rc = 0; struct device *dev = &dp->msm_dp_display.pdev->dev; - struct msm_dp_panel_in panel_in = { - .dev = dev, - }; struct phy *phy; phy = devm_phy_get(dev, "dp"); @@ -765,11 +762,7 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) goto error_link; } - panel_in.aux = dp->aux; - panel_in.catalog = dp->catalog; - panel_in.link = dp->link; - - dp->panel = msm_dp_panel_get(&panel_in); + dp->panel = msm_dp_panel_get(dev, dp->aux, dp->link, dp->catalog); if (IS_ERR(dp->panel)) { rc = PTR_ERR(dp->panel); DRM_ERROR("failed to initialize panel, rc = %d\n", rc); @@ -787,7 +780,7 @@ static int msm_dp_init_sub_modules(struct msm_dp_display_private *dp) goto error_ctrl; } - dp->audio = msm_dp_audio_get(dp->msm_dp_display.pdev, dp->panel, dp->catalog); + dp->audio = msm_dp_audio_get(dp->msm_dp_display.pdev, dp->catalog); if (IS_ERR(dp->audio)) { rc = PTR_ERR(dp->audio); pr_err("failed to initialize audio, rc = %d\n", rc); diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 5d7eaa31bf31..92415bf8aa16 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -317,17 +317,6 @@ static int msm_dp_panel_setup_vsc_sdp_yuv_420(struct msm_dp_panel *msm_dp_panel) return 0; } -void msm_dp_panel_dump_regs(struct msm_dp_panel *msm_dp_panel) -{ - struct msm_dp_catalog *catalog; - struct msm_dp_panel_private *panel; - - panel = container_of(msm_dp_panel, struct msm_dp_panel_private, msm_dp_panel); - catalog = panel->catalog; - - msm_dp_catalog_dump_regs(catalog); -} - int msm_dp_panel_timing_cfg(struct msm_dp_panel *msm_dp_panel) { u32 data, total_ver, total_hor; @@ -486,25 +475,26 @@ static int msm_dp_panel_parse_dt(struct msm_dp_panel *msm_dp_panel) return 0; } -struct msm_dp_panel *msm_dp_panel_get(struct msm_dp_panel_in *in) +struct msm_dp_panel *msm_dp_panel_get(struct device *dev, struct drm_dp_aux *aux, + struct msm_dp_link *link, struct msm_dp_catalog *catalog) { struct msm_dp_panel_private *panel; struct msm_dp_panel *msm_dp_panel; int ret; - if (!in->dev || !in->catalog || !in->aux || !in->link) { + if (!dev || !catalog || !aux || !link) { DRM_ERROR("invalid input\n"); return ERR_PTR(-EINVAL); } - panel = devm_kzalloc(in->dev, sizeof(*panel), GFP_KERNEL); + panel = devm_kzalloc(dev, sizeof(*panel), GFP_KERNEL); if (!panel) return ERR_PTR(-ENOMEM); - panel->dev = in->dev; - panel->aux = in->aux; - panel->catalog = in->catalog; - panel->link = in->link; + panel->dev = dev; + panel->aux = aux; + panel->catalog = catalog; + panel->link = link; msm_dp_panel = &panel->msm_dp_panel; msm_dp_panel->max_bw_code = DP_LINK_BW_8_1; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 0e944db3adf2..4906f4f09f24 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -21,13 +21,6 @@ struct msm_dp_display_mode { bool out_fmt_is_yuv_420; }; -struct msm_dp_panel_in { - struct device *dev; - struct drm_dp_aux *aux; - struct msm_dp_link *link; - struct msm_dp_catalog *catalog; -}; - struct msm_dp_panel_psr { u8 version; u8 capabilities; @@ -55,7 +48,6 @@ struct msm_dp_panel { int msm_dp_panel_init_panel_info(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_deinit(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_timing_cfg(struct msm_dp_panel *msm_dp_panel); -void msm_dp_panel_dump_regs(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_read_sink_caps(struct msm_dp_panel *msm_dp_panel, struct drm_connector *connector); u32 msm_dp_panel_get_mode_bpp(struct msm_dp_panel *msm_dp_panel, u32 mode_max_bpp, @@ -92,6 +84,7 @@ static inline bool is_lane_count_valid(u32 lane_count) lane_count == 4); } -struct msm_dp_panel *msm_dp_panel_get(struct msm_dp_panel_in *in); +struct msm_dp_panel *msm_dp_panel_get(struct device *dev, struct drm_dp_aux *aux, + struct msm_dp_link *link, struct msm_dp_catalog *catalog); void msm_dp_panel_put(struct msm_dp_panel *msm_dp_panel); #endif /* _DP_PANEL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_utils.c b/drivers/gpu/drm/msm/dp/dp_utils.c index 2a40f07fe2d5..4a5ebb0c33b8 100644 --- a/drivers/gpu/drm/msm/dp/dp_utils.c +++ b/drivers/gpu/drm/msm/dp/dp_utils.c @@ -74,14 +74,8 @@ u8 msm_dp_utils_calculate_parity(u32 data) return parity_byte; } -ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *header_buff) +void msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 header_buff[2]) { - size_t length; - - length = sizeof(header_buff); - if (length < DP_SDP_HEADER_SIZE) - return -ENOSPC; - header_buff[0] = FIELD_PREP(HEADER_0_MASK, sdp_header->HB0) | FIELD_PREP(PARITY_0_MASK, msm_dp_utils_calculate_parity(sdp_header->HB0)) | FIELD_PREP(HEADER_1_MASK, sdp_header->HB1) | @@ -91,6 +85,4 @@ ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *head FIELD_PREP(PARITY_2_MASK, msm_dp_utils_calculate_parity(sdp_header->HB2)) | FIELD_PREP(HEADER_3_MASK, sdp_header->HB3) | FIELD_PREP(PARITY_3_MASK, msm_dp_utils_calculate_parity(sdp_header->HB3)); - - return length; } diff --git a/drivers/gpu/drm/msm/dp/dp_utils.h b/drivers/gpu/drm/msm/dp/dp_utils.h index 88d53157f5b5..2e4f98a863c4 100644 --- a/drivers/gpu/drm/msm/dp/dp_utils.h +++ b/drivers/gpu/drm/msm/dp/dp_utils.h @@ -31,6 +31,6 @@ u8 msm_dp_utils_get_g0_value(u8 data); u8 msm_dp_utils_get_g1_value(u8 data); u8 msm_dp_utils_calculate_parity(u32 data); -ssize_t msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 *header_buff); +void msm_dp_utils_pack_sdp_header(struct dp_sdp_header *sdp_header, u32 header_buff[2]); #endif /* _DP_UTILS_H_ */ diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 10ba7d153d1c..7754dcec33d0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -286,6 +286,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_3_0, &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_3_1, + &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_0, &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_1, diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index 4c9b4b37681b..120cb65164c1 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -23,6 +23,7 @@ #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 #define MSM_DSI_6G_VER_MINOR_V2_3_0 0x20030000 +#define MSM_DSI_6G_VER_MINOR_V2_3_1 0x20030001 #define MSM_DSI_6G_VER_MINOR_V2_4_0 0x20040000 #define MSM_DSI_6G_VER_MINOR_V2_4_1 0x20040001 #define MSM_DSI_6G_VER_MINOR_V2_5_0 0x20050000 diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index a98d24b7cb00..007311c21fda 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1831,7 +1831,7 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host) msm_dsi->te_source = devm_kstrdup(dev, te_source, GFP_KERNEL); ret = 0; - if (of_property_read_bool(np, "syscon-sfpb")) { + if (of_property_present(np, "syscon-sfpb")) { msm_host->sfpb = syscon_regmap_lookup_by_phandle(np, "syscon-sfpb"); if (IS_ERR(msm_host->sfpb)) { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index dd58bc0a49eb..c0bcc6828963 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -567,6 +567,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_14nm_8953_cfgs }, { .compatible = "qcom,sm6125-dsi-phy-14nm", .data = &dsi_phy_14nm_2290_cfgs }, + { .compatible = "qcom,sm6150-dsi-phy-14nm", + .data = &dsi_phy_14nm_6150_cfgs }, #endif #ifdef CONFIG_DRM_MSM_DSI_10NM_PHY { .compatible = "qcom,dsi-phy-10nm", diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 4953459edd63..8985818bb2e0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -46,6 +46,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8937_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_14nm_6150_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 1723f0e4faa4..2c3cbe0f2870 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -1032,6 +1032,10 @@ static const struct regulator_bulk_data dsi_phy_14nm_73p4mA_regulators[] = { { .supply = "vcca", .init_load_uA = 73400 }, }; +static const struct regulator_bulk_data dsi_phy_14nm_36mA_regulators[] = { + { .supply = "vdda", .init_load_uA = 36000 }, +}; + const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs = { .has_phy_lane = true, .regulator_data = dsi_phy_14nm_17mA_regulators, @@ -1097,3 +1101,20 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = { .io_start = { 0x5e94400 }, .num_dsi_phy = 1, }; + +const struct msm_dsi_phy_cfg dsi_phy_14nm_6150_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_14nm_36mA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_14nm_36mA_regulators), + .ops = { + .enable = dsi_14nm_phy_enable, + .disable = dsi_14nm_phy_disable, + .pll_init = dsi_pll_14nm_init, + .save_pll_state = dsi_14nm_pll_save_state, + .restore_pll_state = dsi_14nm_pll_restore_state, + }, + .min_pll_rate = VCO_MIN_RATE, + .max_pll_rate = VCO_MAX_RATE, + .io_start = { 0xae94400 }, + .num_dsi_phy = 1, +}; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index a719fd33d9d8..33bb48ae58a2 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -137,7 +137,7 @@ static inline u32 pll_get_integloop_gain(u64 frac_start, u64 bclk, u32 ref_clk, base <<= (digclk_divsel == 2 ? 1 : 0); - return (base <= 2046 ? base : 2046); + return base; } static inline u32 pll_get_pll_cmp(u64 fdata, unsigned long ref_clk) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 9c45d641b521..a7a2384044ff 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -115,7 +115,7 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, timer->kms = kms; timer->crtc_idx = crtc_idx; - timer->worker = kthread_create_worker(0, "atomic-worker-%d", crtc_idx); + timer->worker = kthread_run_worker(0, "atomic-worker-%d", crtc_idx); if (IS_ERR(timer->worker)) { int ret = PTR_ERR(timer->worker); timer->worker = NULL; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 34b971490f10..ff7a7a9f7b0d 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -538,7 +538,7 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, /* Only supported if per-process address space is supported: */ if (priv->gpu->aspace == ctx->aspace) - return -EOPNOTSUPP; + return UERR(EOPNOTSUPP, dev, "requires per-process pgtables"); if (should_fail(&fail_gem_iova, obj->size)) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index d8c9a1b19263..fee31680a6d5 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -28,6 +28,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include <drm/display/drm_dsc.h> #include <drm/msm_drm.h> @@ -506,6 +507,12 @@ void msm_hrtimer_work_init(struct msm_hrtimer_work *work, clockid_t clock_id, enum hrtimer_mode mode); +/* Helper for returning a UABI error with optional logging which can make + * it easier for userspace to understand what it is doing wrong. + */ +#define UERR(err, drm, fmt, ...) \ + ({ DRM_DEV_DEBUG_DRIVER((drm)->dev, fmt, ##__VA_ARGS__); -(err); }) + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index fba78193127d..dee470403036 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -20,8 +20,8 @@ /* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable * error msgs for debugging, but we don't spam dmesg by default */ -#define SUBMIT_ERROR(submit, fmt, ...) \ - DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__) +#define SUBMIT_ERROR(err, submit, fmt, ...) \ + UERR(err, (submit)->dev, fmt, ##__VA_ARGS__) /* * Cmdstream submission: @@ -142,8 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || !(submit_bo.flags & MANDATORY_FLAGS)) { - SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid flags: %x\n", submit_bo.flags); i = 0; goto out; } @@ -162,8 +161,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ obj = idr_find(&file->object_idr, submit->bos[i].handle); if (!obj) { - SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); goto out_unlock; } @@ -206,14 +204,12 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; default: - SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type); - return -EINVAL; + return SUBMIT_ERROR(EINVAL, submit, "invalid type: %08x\n", submit_cmd.type); } if (submit_cmd.size % 4) { - SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n", - submit_cmd.size); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "non-aligned cmdstream buffer size: %u\n", + submit_cmd.size); goto out; } @@ -371,9 +367,8 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, struct drm_gem_object **obj, uint64_t *iova) { if (idx >= submit->nr_bos) { - SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n", - idx, submit->nr_bos); - return -EINVAL; + return SUBMIT_ERROR(EINVAL, submit, "invalid buffer index: %u (out of %u)\n", + idx, submit->nr_bos); } if (obj) @@ -392,10 +387,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint32_t *ptr; int ret = 0; - if (offset % 4) { - SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset); - return -EINVAL; - } + if (offset % 4) + return SUBMIT_ERROR(EINVAL, submit, "non-aligned cmdstream buffer: %u\n", offset); /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. @@ -414,9 +407,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint64_t iova; if (submit_reloc.submit_offset % 4) { - SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n", - submit_reloc.submit_offset); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "non-aligned reloc offset: %u\n", + submit_reloc.submit_offset); goto out; } @@ -425,8 +417,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob if ((off >= (obj->size / 4)) || (off < last_offset)) { - SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i); - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid offset %u at reloc %u\n", off, i); goto out; } @@ -513,12 +504,12 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, if (syncobj_desc.point && !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) { - ret = -EOPNOTSUPP; + ret = SUBMIT_ERROR(EOPNOTSUPP, submit, "syncobj timeline unsupported"); break; } if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) { - ret = -EINVAL; + ret = -SUBMIT_ERROR(EINVAL, submit, "invalid syncobj flags: %x", syncobj_desc.flags); break; } @@ -531,7 +522,7 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, syncobjs[i] = drm_syncobj_find(file, syncobj_desc.handle); if (!syncobjs[i]) { - ret = -EINVAL; + ret = SUBMIT_ERROR(EINVAL, submit, "invalid syncobj handle: %u", i); break; } } @@ -588,14 +579,14 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, post_deps[i].point = syncobj_desc.point; if (syncobj_desc.flags) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid syncobj flags"); break; } if (syncobj_desc.point) { if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) { - ret = -EOPNOTSUPP; + ret = UERR(EOPNOTSUPP, dev, "syncobj timeline unsupported"); break; } @@ -609,7 +600,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, post_deps[i].syncobj = drm_syncobj_find(file, syncobj_desc.handle); if (!post_deps[i].syncobj) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid syncobj handle"); break; } } @@ -677,10 +668,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * be more clever to dispatch to appropriate gpu module: */ if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0) - return -EINVAL; + return UERR(EINVAL, dev, "invalid pipe"); if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) - return -EINVAL; + return UERR(EINVAL, dev, "invalid flags"); if (args->flags & MSM_SUBMIT_SUDO) { if (!IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) || @@ -724,7 +715,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, in_fence = sync_file_get_fence(args->fence_fd); if (!in_fence) { - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid in-fence"); goto out_unlock; } @@ -787,10 +778,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; if (!submit->cmd[i].size || - ((submit->cmd[i].size + submit->cmd[i].offset) > - obj->size / 4)) { - SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4); - ret = -EINVAL; + (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) { + ret = UERR(EINVAL, dev, "invalid cmdstream size: %u\n", + submit->cmd[i].size * 4); goto out; } @@ -800,8 +790,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, continue; if (!gpu->allow_relocs) { - SUBMIT_ERROR(submit, "relocs not allowed\n"); - ret = -EINVAL; + ret = UERR(EINVAL, dev, "relocs not allowed\n"); goto out; } @@ -827,7 +816,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, (!args->fence || idr_find(&queue->fence_idr, args->fence))) { spin_unlock(&queue->idr_lock); idr_preload_end(); - ret = -EINVAL; + ret = UERR(EINVAL, dev, "invalid in-fence-sn"); goto out; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0d4a3744cfcb..8557998e0c92 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -859,7 +859,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->funcs = funcs; gpu->name = name; - gpu->worker = kthread_create_worker(0, "gpu-worker"); + gpu->worker = kthread_run_worker(0, "gpu-worker"); if (IS_ERR(gpu->worker)) { ret = PTR_ERR(gpu->worker); gpu->worker = NULL; diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index f3326d09bdbc..38965e12a6bf 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); - priv->kms = NULL; return ret; } @@ -269,7 +268,7 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) /* initialize event thread */ ev_thread = &priv->event_thread[drm_crtc_index(crtc)]; ev_thread->dev = ddev; - ev_thread->worker = kthread_create_worker(0, "crtc_event:%d", crtc->base.id); + ev_thread->worker = kthread_run_worker(0, "crtc_event:%d", crtc->base.id); if (IS_ERR(ev_thread->worker)) { ret = PTR_ERR(ev_thread->worker); DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n"); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 76b6ae35a3cb..dcb49fd30402 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -166,22 +166,32 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); - writel_relaxed(data->ubwc_static, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); + if (data->ubwc_bank_spread) + value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; + + if (data->ubwc_enc_version == UBWC_1_0) + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); + + writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; - u32 value = (data->ubwc_swizzle & 0x1) | - (data->highest_bank_bit & 0x3) << 4 | - (data->macrotile_mode & 0x1) << 12; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle & 0x1) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); + + if (data->macrotile_mode) + value |= MDSS_UBWC_STATIC_MACROTILE_MODE; if (data->ubwc_enc_version == UBWC_3_0) - value |= BIT(10); + value |= MDSS_UBWC_STATIC_UBWC_AMSBC; if (data->ubwc_enc_version == UBWC_1_0) - value |= BIT(8); + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } @@ -189,10 +199,14 @@ static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss) static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss) { const struct msm_mdss_data *data = msm_mdss->mdss_data; - u32 value = (data->ubwc_swizzle & 0x7) | - (data->ubwc_static & 0x1) << 3 | - (data->highest_bank_bit & 0x7) << 4 | - (data->macrotile_mode & 0x1) << 12; + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit); + + if (data->ubwc_bank_spread) + value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; + + if (data->macrotile_mode) + value |= MDSS_UBWC_STATIC_MACROTILE_MODE; writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); @@ -572,16 +586,17 @@ static const struct msm_mdss_data sa8775p_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 4, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 0, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, - .ubwc_static = 0x1e, + .ubwc_swizzle = 6, + .ubwc_bank_spread = true, .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; @@ -590,9 +605,9 @@ static const struct msm_mdss_data sc7280_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 1, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; @@ -600,7 +615,7 @@ static const struct msm_mdss_data sc8180x_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -608,9 +623,9 @@ static const struct msm_mdss_data sc8280xp_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -632,7 +647,7 @@ static const struct msm_mdss_data sm6350_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_swizzle = 6, - .ubwc_static = 0x1e, + .ubwc_bank_spread = true, .highest_bank_bit = 1, .reg_bus_bw = 76800, }; @@ -655,7 +670,7 @@ static const struct msm_mdss_data sm6115_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_2_0, .ubwc_swizzle = 7, - .ubwc_static = 0x11f, + .ubwc_bank_spread = true, .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; @@ -667,14 +682,21 @@ static const struct msm_mdss_data sm6125_data = { .highest_bank_bit = 1, }; +static const struct msm_mdss_data sm6150_data = { + .ubwc_enc_version = UBWC_2_0, + .ubwc_dec_version = UBWC_2_0, + .highest_bank_bit = 1, + .reg_bus_bw = 76800, +}; + static const struct msm_mdss_data sm8250_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 76800, }; @@ -682,10 +704,10 @@ static const struct msm_mdss_data sm8350_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 74000, }; @@ -693,10 +715,10 @@ static const struct msm_mdss_data sm8550_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_3, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, .reg_bus_bw = 57000, }; @@ -704,10 +726,10 @@ static const struct msm_mdss_data x1e80100_data = { .ubwc_enc_version = UBWC_4_0, .ubwc_dec_version = UBWC_4_3, .ubwc_swizzle = 6, - .ubwc_static = 1, + .ubwc_bank_spread = true, /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, - .macrotile_mode = 1, + .macrotile_mode = true, /* TODO: Add reg_bus_bw with real value */ }; @@ -724,6 +746,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sc8280xp-mdss", .data = &sc8280xp_data }, { .compatible = "qcom,sm6115-mdss", .data = &sm6115_data }, { .compatible = "qcom,sm6125-mdss", .data = &sm6125_data }, + { .compatible = "qcom,sm6150-mdss", .data = &sm6150_data }, { .compatible = "qcom,sm6350-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm6375-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm7150-mdss", .data = &sm7150_data }, diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h index 3afef4b1786d..14dc53704314 100644 --- a/drivers/gpu/drm/msm/msm_mdss.h +++ b/drivers/gpu/drm/msm/msm_mdss.h @@ -11,9 +11,9 @@ struct msm_mdss_data { /* can be read from register 0x58 */ u32 ubwc_dec_version; u32 ubwc_swizzle; - u32 ubwc_static; u32 highest_bank_bit; - u32 macrotile_mode; + bool ubwc_bank_spread; + bool macrotile_mode; u32 reg_bus_bw; }; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 2fc3eaf81f44..7fed1de63b5d 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -18,7 +18,7 @@ int msm_file_private_set_sysprof(struct msm_file_private *ctx, switch (sysprof) { default: - return -EINVAL; + return UERR(EINVAL, gpu->dev, "Invalid sysprof: %d", sysprof); case 2: pm_runtime_get_sync(&gpu->pdev->dev); fallthrough; diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml index 6531749d30f4..3d2cc339b8f1 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml @@ -52,6 +52,11 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x23fd" name="GMU_DCVS_PERF_SETTING"/> <reg32 offset="0x23fe" name="GMU_DCVS_BW_SETTING"/> <reg32 offset="0x23ff" name="GMU_DCVS_RETURN"/> + <reg32 offset="0x2bf8" name="GMU_CORE_FW_VERSION"> + <bitfield name="MAJOR" low="28" high="31"/> + <bitfield name="MINOR" low="16" high="27"/> + <bitfield name="STEP" low="0" high="15"/> + </reg32> <reg32 offset="0x4c00" name="GMU_ICACHE_CONFIG"/> <reg32 offset="0x4c01" name="GMU_DCACHE_CONFIG"/> <reg32 offset="0x4c0f" name="GMU_SYS_BUS_CONFIG"/> diff --git a/drivers/gpu/drm/msm/registers/display/mdss.xml b/drivers/gpu/drm/msm/registers/display/mdss.xml index ac85caf1575c..6e9f81cd4690 100644 --- a/drivers/gpu/drm/msm/registers/display/mdss.xml +++ b/drivers/gpu/drm/msm/registers/display/mdss.xml @@ -21,7 +21,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00058" name="UBWC_DEC_HW_VERSION"/> - <reg32 offset="0x00144" name="UBWC_STATIC"/> + <reg32 offset="0x00144" name="UBWC_STATIC"> + <bitfield name="UBWC_SWIZZLE" low="0" high="2"/> + <bitfield name="UBWC_BANK_SPREAD" pos="3"/> + <!-- high=5 for UBWC < 4.0 --> + <bitfield name="HIGHEST_BANK_BIT" low="4" high="6"/> + <bitfield name="UBWC_MIN_ACC_LEN" low="8" high="9"/> + <bitfield name="UBWC_AMSBC" pos="10"/> + <bitfield name="MACROTILE_MODE" pos="12"/> + </reg32> + <reg32 offset="0x00150" name="UBWC_CTRL_2"/> <reg32 offset="0x00154" name="UBWC_PREDICTION_MODE"/> </domain> diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 8f0c69aad248..21b56cc7605c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -384,7 +384,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) if (ret < 0) return NULL; - return kmemdup(edid, EDID_LENGTH, GFP_KERNEL); + return edid; } bool nouveau_acpi_video_backlight_use_native(void) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 2cb2e5675807..cd659b9fd1d9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -279,7 +279,6 @@ nouveau_channel_ctor(struct nouveau_cli *cli, bool priv, u64 runm, const u64 plength = 0x10000; const u64 ioffset = plength; const u64 ilength = 0x02000; - char name[TASK_COMM_LEN]; int cid, ret; u64 size; @@ -338,8 +337,7 @@ nouveau_channel_ctor(struct nouveau_cli *cli, bool priv, u64 runm, chan->userd = &chan->user; } - get_task_comm(name, current); - snprintf(args.name, sizeof(args.name), "%s[%d]", name, task_pid_nr(current)); + snprintf(args.name, sizeof(args.name), "%s[%d]", current->comm, task_pid_nr(current)); ret = nvif_object_ctor(&device->object, "abi16ChanUser", 0, hosts[cid].oclass, &args, sizeof(args), &chan->user); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 21d2d9ca5e85..5664c4c71faf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1175,7 +1175,7 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) { struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_cli *cli; - char name[32], tmpname[TASK_COMM_LEN]; + char name[32]; int ret; /* need to bring up power immediately if opening device */ @@ -1185,10 +1185,9 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) return ret; } - get_task_comm(tmpname, current); rcu_read_lock(); snprintf(name, sizeof(name), "%s[%d]", - tmpname, pid_nr(rcu_dereference(fpriv->pid))); + current->comm, pid_nr(rcu_dereference(fpriv->pid))); rcu_read_unlock(); if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 09686d038d60..7cc84472cece 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -387,11 +387,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (f) { struct nouveau_channel *prev; bool must_wait = true; + bool local; rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || - fctx->sync(f, prev, chan) == 0)) + local = prev && prev->cli->drm == chan->cli->drm; + if (local && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); if (!must_wait) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c index 841e3b69fcaf..5a0c9b8a79f3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c @@ -31,6 +31,7 @@ mcp77_sor = { .state = g94_sor_state, .power = nv50_sor_power, .clock = nv50_sor_clock, + .bl = &nv50_sor_bl, .hdmi = &g84_sor_hdmi, .dp = &g94_sor_dp, }; diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b48bba18131..3644a7544b93 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -565,6 +565,8 @@ static int hx83102_get_modes(struct drm_panel *panel, struct drm_display_mode *mode; mode = drm_mode_duplicate(connector->dev, m); + if (!mode) + return -ENOMEM; mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; drm_mode_set_name(mode); diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35950.c b/drivers/gpu/drm/panel/panel-novatek-nt35950.c index b036208f9356..08b22b592ab0 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35950.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35950.c @@ -481,9 +481,9 @@ static int nt35950_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, -EPROBE_DEFER, "Cannot get secondary DSI host\n"); nt->dsi[1] = mipi_dsi_device_register_full(dsi_r_host, info); - if (!nt->dsi[1]) { + if (IS_ERR(nt->dsi[1])) { dev_err(dev, "Cannot get secondary DSI node\n"); - return -ENODEV; + return PTR_ERR(nt->dsi[1]); } num_dsis++; } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dbad12a354b6..9b2f128fd309 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -4454,6 +4454,37 @@ static const struct panel_desc ti_nspire_classic_lcd_panel = { .bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, }; +static const struct display_timing topland_tian_g07017_01_timing = { + .pixelclock = { 44900000, 51200000, 63000000 }, + .hactive = { 1024, 1024, 1024 }, + .hfront_porch = { 16, 160, 216 }, + .hback_porch = { 160, 160, 160 }, + .hsync_len = { 1, 1, 140 }, + .vactive = { 600, 600, 600 }, + .vfront_porch = { 1, 12, 127 }, + .vback_porch = { 23, 23, 23 }, + .vsync_len = { 1, 1, 20 }, +}; + +static const struct panel_desc topland_tian_g07017_01 = { + .timings = &topland_tian_g07017_01_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 154, + .height = 86, + }, + .delay = { + .prepare = 1, /* 6.5 - 150µs PLL wake-up time */ + .enable = 100, /* 6.4 - Power on: 6 VSyncs */ + .disable = 84, /* 6.4 - Power off: 5 Vsyncs */ + .unprepare = 50, /* 6.4 - Power off: 3 Vsyncs */ + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, +}; + static const struct drm_display_mode toshiba_lt089ac29000_mode = { .clock = 79500, .hdisplay = 1280, @@ -5140,6 +5171,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "toshiba,lt089ac29000", .data = &toshiba_lt089ac29000, }, { + .compatible = "topland,tian-g07017-01", + .data = &topland_tian_g07017_01, + }, { .compatible = "tpk,f07a-0102", .data = &tpk_f07a_0102, }, { diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c index eef03d04e0cd..1f72ef7ca74c 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c @@ -1177,6 +1177,7 @@ static int st7701_probe(struct device *dev, int connector_type) return dev_err_probe(dev, ret, "Failed to get orientation\n"); drm_panel_init(&st7701->panel, dev, &st7701_funcs, connector_type); + st7701->panel.prepare_prev_first = true; /** * Once sleep out has been issued, ST7701 IC required to wait 120ms diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c index 169c629746c7..17349825543f 100644 --- a/drivers/gpu/drm/panel/panel-synaptics-r63353.c +++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c @@ -325,7 +325,7 @@ static void r63353_panel_shutdown(struct mipi_dsi_device *dsi) { struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi); - r63353_panel_unprepare(&rpanel->base); + drm_panel_unprepare(&rpanel->base); } static const struct r63353_desc sharp_ls068b3sx02_data = { diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index b76337f690ec..b17de83b988b 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -885,7 +885,6 @@ static const struct hdmi_codec_ops audio_codec_ops = { .mute_stream = cdn_dp_audio_mute_stream, .get_eld = cdn_dp_audio_get_eld, .hook_plugged_cb = cdn_dp_audio_hook_plugged_cb, - .no_capture_mute = 1, }; static int cdn_dp_audio_codec_init(struct cdn_dp_device *dp, @@ -896,6 +895,7 @@ static int cdn_dp_audio_codec_init(struct cdn_dp_device *dp, .spdif = 1, .ops = &audio_codec_ops, .max_i2s_channels = 8, + .no_capture_mute = 1, }; dp->audio_pdev = platform_device_register_data( diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 3ca6a688f5f5..17a98845fd31 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1454,16 +1454,18 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en); if (afbc_en) { - u32 stride; + u32 stride, block_w; + + /* the afbc superblock is 16 x 16 or 32 x 8 */ + block_w = fb->modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 ? 32 : 16; - /* the afbc superblock is 16 x 16 */ afbc_format = vop2_convert_afbc_format(fb->format->format); /* Enable color transform for YTR */ if (fb->modifier & AFBC_FORMAT_MOD_YTR) afbc_format |= (1 << 4); - afbc_tile_num = ALIGN(actual_w, 16) >> 4; + afbc_tile_num = ALIGN(actual_w, block_w) / block_w; /* * AFBC pic_vir_width is count by pixel, this is different @@ -1474,6 +1476,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, drm_dbg_kms(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", vp->id, win->data->name, stride); + /* It's for head stride, each head size is 16 byte */ + stride = ALIGN(stride, block_w) / block_w * 16; + uv_swap = vop2_afbc_uv_swap(fb->format->format); /* * This is a workaround for crazy IC design, Cluster @@ -1504,7 +1509,11 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, else vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1); - vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); + if (fb->modifier & AFBC_FORMAT_MOD_SPLIT) + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 1); + else + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); + transform_offset = vop2_afbc_transform_offset(pstate, half_block_en); vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst); vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7ce25281c74c..57da84908752 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1355,7 +1355,8 @@ EXPORT_SYMBOL(drm_sched_init); * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job() * will not be called for all jobs still in drm_gpu_scheduler.pending_list. * There is no solution for this currently. Thus, it is up to the driver to make - * sure that + * sure that: + * * a) drm_sched_fini() is only called after for all submitted jobs * drm_sched_backend_ops.free_job() has been called or that * b) the jobs for which drm_sched_backend_ops.free_job() has not been called diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index f8bbae6393ef..ca2fe17de4a5 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1237,7 +1237,6 @@ static const struct hdmi_codec_ops audio_codec_ops = { .audio_shutdown = hdmi_audio_shutdown, .mute_stream = hdmi_audio_mute, .get_eld = hdmi_audio_get_eld, - .no_capture_mute = 1, }; static int sti_hdmi_register_audio_driver(struct device *dev, @@ -1247,6 +1246,7 @@ static int sti_hdmi_register_audio_driver(struct device *dev, .ops = &audio_codec_ops, .max_i2s_channels = 8, .i2s = 1, + .no_capture_mute = 1, }; DRM_DEBUG_DRIVER("\n"); diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 129e813cfd1b..22e2d959eb31 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -1095,6 +1095,64 @@ static void drm_test_connector_hdmi_init_formats_no_rgb(struct kunit *test) KUNIT_EXPECT_LT(test, ret, 0); } +struct drm_connector_hdmi_init_formats_yuv420_allowed_test { + unsigned long supported_formats; + bool yuv420_allowed; + int expected_result; +}; + +#define YUV420_ALLOWED_TEST(_formats, _allowed, _result) \ + { \ + .supported_formats = BIT(HDMI_COLORSPACE_RGB) | (_formats), \ + .yuv420_allowed = _allowed, \ + .expected_result = _result, \ + } + +static const struct drm_connector_hdmi_init_formats_yuv420_allowed_test +drm_connector_hdmi_init_formats_yuv420_allowed_tests[] = { + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV420), true, 0), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV420), false, -EINVAL), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV422), true, -EINVAL), + YUV420_ALLOWED_TEST(BIT(HDMI_COLORSPACE_YUV422), false, 0), +}; + +static void +drm_connector_hdmi_init_formats_yuv420_allowed_desc(const struct drm_connector_hdmi_init_formats_yuv420_allowed_test *t, + char *desc) +{ + sprintf(desc, "supported_formats=0x%lx yuv420_allowed=%d", + t->supported_formats, t->yuv420_allowed); +} + +KUNIT_ARRAY_PARAM(drm_connector_hdmi_init_formats_yuv420_allowed, + drm_connector_hdmi_init_formats_yuv420_allowed_tests, + drm_connector_hdmi_init_formats_yuv420_allowed_desc); + +/* + * Test that the registration of an HDMI connector succeeds only when + * the presence of YUV420 in the supported formats matches the value + * of the ycbcr_420_allowed flag. + */ +static void drm_test_connector_hdmi_init_formats_yuv420_allowed(struct kunit *test) +{ + const struct drm_connector_hdmi_init_formats_yuv420_allowed_test *params; + struct drm_connector_init_priv *priv = test->priv; + int ret; + + params = test->param_value; + priv->connector.ycbcr_420_allowed = params->yuv420_allowed; + + ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", + &dummy_funcs, + &dummy_hdmi_funcs, + DRM_MODE_CONNECTOR_HDMIA, + &priv->ddc, + params->supported_formats, + 8); + KUNIT_EXPECT_EQ(test, ret, params->expected_result); +} + /* * Test that the registration of an HDMI connector with an HDMI * connector type succeeds. @@ -1186,6 +1244,8 @@ static struct kunit_case drmm_connector_hdmi_init_tests[] = { KUNIT_CASE(drm_test_connector_hdmi_init_bpc_null), KUNIT_CASE(drm_test_connector_hdmi_init_formats_empty), KUNIT_CASE(drm_test_connector_hdmi_init_formats_no_rgb), + KUNIT_CASE_PARAM(drm_test_connector_hdmi_init_formats_yuv420_allowed, + drm_connector_hdmi_init_formats_yuv420_allowed_gen_params), KUNIT_CASE(drm_test_connector_hdmi_init_null_ddc), KUNIT_CASE(drm_test_connector_hdmi_init_null_product), KUNIT_CASE(drm_test_connector_hdmi_init_null_vendor), diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index c3b693bb966f..b976a5e9aef5 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -1568,6 +1568,57 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes KUNIT_EXPECT_EQ(test, conn_state->hdmi.output_format, HDMI_COLORSPACE_RGB); } +/* Test that atomic check succeeds when disabling a connector. */ +static void drm_test_check_disable_connector(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_modeset_acquire_ctx *ctx; + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + struct drm_atomic_state *state; + struct drm_display_mode *preferred; + struct drm_connector *conn; + struct drm_device *drm; + struct drm_crtc *crtc; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); + KUNIT_ASSERT_NOT_NULL(test, priv); + + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + + conn = &priv->connector; + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NOT_NULL(test, preferred); + + drm = &priv->drm; + crtc = priv->crtc; + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); + KUNIT_ASSERT_EQ(test, ret, 0); + + state = drm_kunit_helper_atomic_state_alloc(test, drm, ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + + crtc_state->active = false; + ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL); + KUNIT_EXPECT_EQ(test, ret, 0); + + conn_state = drm_atomic_get_connector_state(state, conn); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + + ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); + KUNIT_EXPECT_EQ(test, ret, 0); + + ret = drm_atomic_check_only(state); + KUNIT_ASSERT_EQ(test, ret, 0); +} + static struct kunit_case drm_atomic_helper_connector_hdmi_check_tests[] = { KUNIT_CASE(drm_test_check_broadcast_rgb_auto_cea_mode), KUNIT_CASE(drm_test_check_broadcast_rgb_auto_cea_mode_vic_1), @@ -1582,6 +1633,7 @@ static struct kunit_case drm_atomic_helper_connector_hdmi_check_tests[] = { */ KUNIT_CASE(drm_test_check_broadcast_rgb_crtc_mode_changed), KUNIT_CASE(drm_test_check_broadcast_rgb_crtc_mode_not_changed), + KUNIT_CASE(drm_test_check_disable_connector), KUNIT_CASE(drm_test_check_hdmi_funcs_reject_rate), KUNIT_CASE(drm_test_check_max_tmds_rate_bpc_fallback), KUNIT_CASE(drm_test_check_max_tmds_rate_format_fallback), diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index 04a6b8cc62ac..3c0b7824c0be 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -320,8 +320,7 @@ static void kunit_action_drm_mode_destroy(void *ptr) } /** - * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC - for a KUnit test + * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test * @test: The test context object * @dev: DRM device * @video_code: CEA VIC of the mode diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 89a699370a59..c67e1f906785 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -757,7 +757,6 @@ static void bochs_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); - drm_dev_put(dev); } static void bochs_pci_shutdown(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index 3139fd9128d8..f8f20d2f6174 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c @@ -258,13 +258,13 @@ static void ttm_bo_unreserve_basic(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); bo->priority = bo_prio; - err = ttm_resource_alloc(bo, place, &res1); + err = ttm_resource_alloc(bo, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res1; /* Add a dummy resource to populate LRU */ - ttm_resource_alloc(bo, place, &res2); + ttm_resource_alloc(bo, place, &res2, NULL); dma_resv_lock(bo->base.resv, NULL); ttm_bo_unreserve(bo); @@ -300,12 +300,12 @@ static void ttm_bo_unreserve_pinned(struct kunit *test) dma_resv_lock(bo->base.resv, NULL); ttm_bo_pin(bo); - err = ttm_resource_alloc(bo, place, &res1); + err = ttm_resource_alloc(bo, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res1; /* Add a dummy resource to the pinned list */ - err = ttm_resource_alloc(bo, place, &res2); + err = ttm_resource_alloc(bo, place, &res2, NULL); KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, list_is_last(&res2->lru.link, &priv->ttm_dev->unevictable), 1); @@ -355,7 +355,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) ttm_bo_set_bulk_move(bo1, &lru_bulk_move); dma_resv_unlock(bo1->base.resv); - err = ttm_resource_alloc(bo1, place, &res1); + err = ttm_resource_alloc(bo1, place, &res1, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo1->resource = res1; @@ -363,7 +363,7 @@ static void ttm_bo_unreserve_bulk(struct kunit *test) ttm_bo_set_bulk_move(bo2, &lru_bulk_move); dma_resv_unlock(bo2->base.resv); - err = ttm_resource_alloc(bo2, place, &res2); + err = ttm_resource_alloc(bo2, place, &res2, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo2->resource = res2; @@ -401,7 +401,7 @@ static void ttm_bo_put_basic(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); bo->type = ttm_bo_type_device; - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; @@ -518,7 +518,7 @@ static void ttm_bo_pin_unpin_resource(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; @@ -569,7 +569,7 @@ static void ttm_bo_multiple_pin_one_unpin(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL); - err = ttm_resource_alloc(bo, place, &res); + err = ttm_resource_alloc(bo, place, &res, NULL); KUNIT_ASSERT_EQ(test, err, 0); bo->resource = res; diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 1adf18481ea0..3148f5d3dbd6 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -542,7 +542,7 @@ static void ttm_bo_validate_no_placement_signaled(struct kunit *test) bo->ttm = old_tt; } - err = ttm_resource_alloc(bo, place, &bo->resource); + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); KUNIT_EXPECT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, man->usage, size); @@ -603,7 +603,7 @@ static void ttm_bo_validate_no_placement_not_signaled(struct kunit *test) bo = ttm_bo_kunit_init(test, test->priv, size, NULL); bo->type = params->bo_type; - err = ttm_resource_alloc(bo, place, &bo->resource); + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); KUNIT_EXPECT_EQ(test, err, 0); placement = kunit_kzalloc(test, sizeof(*placement), GFP_KERNEL); diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c index a9f4b81921c3..e6ea2bd01f07 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c @@ -302,7 +302,7 @@ static void ttm_sys_man_free_basic(struct kunit *test) res = kunit_kzalloc(test, sizeof(*res), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, res); - ttm_resource_alloc(bo, place, &res); + ttm_resource_alloc(bo, place, &res, NULL); man = ttm_manager_type(priv->devs->ttm_dev, mem_type); man->func->free(man, res); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 48c5365efca1..ea5e49858857 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -42,6 +42,7 @@ #include <linux/file.h> #include <linux/module.h> #include <linux/atomic.h> +#include <linux/cgroup_dmem.h> #include <linux/dma-resv.h> #include "ttm_module.h" @@ -499,6 +500,13 @@ struct ttm_bo_evict_walk { struct ttm_resource **res; /** @evicted: Number of successful evictions. */ unsigned long evicted; + + /** @limit_pool: Which pool limit we should test against */ + struct dmem_cgroup_pool_state *limit_pool; + /** @try_low: Whether we should attempt to evict BO's with low watermark threshold */ + bool try_low; + /** @hit_low: If we cannot evict a bo when @try_low is false (first pass) */ + bool hit_low; }; static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) @@ -507,6 +515,10 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * container_of(walk, typeof(*evict_walk), walk); s64 lret; + if (!dmem_cgroup_state_evict_valuable(evict_walk->limit_pool, bo->resource->css, + evict_walk->try_low, &evict_walk->hit_low)) + return 0; + if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place)) return 0; @@ -524,7 +536,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * evict_walk->evicted++; if (evict_walk->res) lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place, - evict_walk->res); + evict_walk->res, NULL); if (lret == 0) return 1; out: @@ -545,7 +557,8 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, struct ttm_buffer_object *evictor, struct ttm_operation_ctx *ctx, struct ww_acquire_ctx *ticket, - struct ttm_resource **res) + struct ttm_resource **res, + struct dmem_cgroup_pool_state *limit_pool) { struct ttm_bo_evict_walk evict_walk = { .walk = { @@ -556,22 +569,39 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, .place = place, .evictor = evictor, .res = res, + .limit_pool = limit_pool, }; s64 lret; evict_walk.walk.trylock_only = true; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + + /* One more attempt if we hit low limit? */ + if (!lret && evict_walk.hit_low) { + evict_walk.try_low = true; + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + } if (lret || !ticket) goto out; + /* Reset low limit */ + evict_walk.try_low = evict_walk.hit_low = false; /* If ticket-locking, repeat while making progress. */ evict_walk.walk.trylock_only = false; + +retry: do { /* The walk may clear the evict_walk.walk.ticket field */ evict_walk.walk.ticket = ticket; evict_walk.evicted = 0; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); } while (!lret && evict_walk.evicted); + + /* We hit the low limit? Try once more */ + if (!lret && evict_walk.hit_low && !evict_walk.try_low) { + evict_walk.try_low = true; + goto retry; + } out: if (lret < 0) return lret; @@ -689,6 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, for (i = 0; i < placement->num_placement; ++i) { const struct ttm_place *place = &placement->placement[i]; + struct dmem_cgroup_pool_state *limit_pool = NULL; struct ttm_resource_manager *man; bool may_evict; @@ -701,15 +732,20 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, continue; may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); - ret = ttm_resource_alloc(bo, place, res); + ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL); if (ret) { - if (ret != -ENOSPC) + if (ret != -ENOSPC && ret != -EAGAIN) { + dmem_cgroup_pool_state_put(limit_pool); return ret; - if (!may_evict) + } + if (!may_evict) { + dmem_cgroup_pool_state_put(limit_pool); continue; + } ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx, - ticket, res); + ticket, res, limit_pool); + dmem_cgroup_pool_state_put(limit_pool); if (ret == -EBUSY) continue; if (ret) @@ -1056,6 +1092,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; + + bool hit_low, evict_low; }; static s64 @@ -1106,7 +1144,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) memset(&hop, 0, sizeof(hop)); place.mem_type = TTM_PL_SYSTEM; - ret = ttm_resource_alloc(bo, &place, &evict_mem); + ret = ttm_resource_alloc(bo, &place, &evict_mem, NULL); if (ret) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index a87665eb28a6..cc29bbf3eabb 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -26,6 +26,7 @@ #include <linux/io-mapping.h> #include <linux/iosys-map.h> #include <linux/scatterlist.h> +#include <linux/cgroup_dmem.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> @@ -350,15 +351,28 @@ EXPORT_SYMBOL(ttm_resource_fini); int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource **res_ptr) + struct ttm_resource **res_ptr, + struct dmem_cgroup_pool_state **ret_limit_pool) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, place->mem_type); + struct dmem_cgroup_pool_state *pool = NULL; int ret; + if (man->cg) { + ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool); + if (ret) + return ret; + } + ret = man->func->alloc(man, bo, place, res_ptr); - if (ret) + if (ret) { + if (pool) + dmem_cgroup_uncharge(pool, bo->base.size); return ret; + } + + (*res_ptr)->css = pool; spin_lock(&bo->bdev->lru_lock); ttm_resource_add_bulk_move(*res_ptr, bo); @@ -370,6 +384,7 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_resource_alloc); void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) { struct ttm_resource_manager *man; + struct dmem_cgroup_pool_state *pool; if (!*res) return; @@ -377,9 +392,13 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) spin_lock(&bo->bdev->lru_lock); ttm_resource_del_bulk_move(*res, bo); spin_unlock(&bo->bdev->lru_lock); + + pool = (*res)->css; man = ttm_manager_type(bo->bdev, (*res)->mem_type); man->func->free(man, *res); *res = NULL; + if (man->cg) + dmem_cgroup_uncharge(pool, bo->base.size); } EXPORT_SYMBOL(ttm_resource_free); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 20bf33702c3c..72b6a119412f 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -107,7 +107,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN); trace_v3d_bcl_irq(&v3d->drm, fence->seqno); + + v3d->bin_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -117,7 +120,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER); trace_v3d_rcl_irq(&v3d->drm, fence->seqno); + + v3d->render_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -127,7 +133,10 @@ v3d_irq(int irq, void *arg) v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD); trace_v3d_csd_irq(&v3d->drm, fence->seqno); + + v3d->csd_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -164,7 +173,10 @@ v3d_hub_irq(int irq, void *arg) v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU); trace_v3d_tfu_irq(&v3d->drm, fence->seqno); + + v3d->tfu_job = NULL; dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index c5f30b317698..6cc7b7e6294a 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -10,6 +10,7 @@ config DRM_VC4 depends on COMMON_CLK depends on PM select DRM_CLIENT_SELECTION + select DRM_DISPLAY_HDMI_AUDIO_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDMI_STATE_HELPER select DRM_DISPLAY_HELPER diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 203293a8beca..47d9ada98430 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -31,6 +31,7 @@ * encoder block has CEC support. */ +#include <drm/display/drm_hdmi_audio_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/display/drm_hdmi_state_helper.h> #include <drm/display/drm_scdc_helper.h> @@ -383,7 +384,6 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi, enum drm_connector_status status) { struct drm_connector *connector = &vc4_hdmi->connector; - const struct drm_edid *drm_edid; int ret; /* @@ -405,17 +405,14 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi, return; } - drm_edid = drm_edid_read_ddc(connector, vc4_hdmi->ddc); + drm_atomic_helper_connector_hdmi_hotplug(connector, status); - drm_edid_connector_update(connector, drm_edid); cec_s_phys_addr(vc4_hdmi->cec_adap, connector->display_info.source_physical_address, false); - if (!drm_edid) + if (status != connector_status_connected) return; - drm_edid_free(drm_edid); - for (;;) { ret = vc4_hdmi_reset_link(connector, ctx); if (ret == -EDEADLK) { @@ -470,31 +467,10 @@ static int vc4_hdmi_connector_detect_ctx(struct drm_connector *connector, static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) { - struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); struct vc4_dev *vc4 = to_vc4_dev(connector->dev); - const struct drm_edid *drm_edid; int ret = 0; - /* - * NOTE: This function should really take vc4_hdmi->mutex, but doing so - * results in reentrancy issues since cec_s_phys_addr() might call - * .adap_enable, which leads to that funtion being called with our mutex - * held. - * - * Concurrency isn't an issue at the moment since we don't share - * any state with any of the other frameworks so we can ignore - * the lock for now. - */ - - drm_edid = drm_edid_read_ddc(connector, vc4_hdmi->ddc); - drm_edid_connector_update(connector, drm_edid); - cec_s_phys_addr(vc4_hdmi->cec_adap, - connector->display_info.source_physical_address, false); - if (!drm_edid) - return 0; - ret = drm_edid_connector_add_modes(connector); - drm_edid_free(drm_edid); if (!vc4->hvs->vc5_hdmi_enable_hdmi_20) { struct drm_device *drm = connector->dev; @@ -570,6 +546,7 @@ static void vc4_hdmi_connector_reset(struct drm_connector *connector) } static const struct drm_connector_funcs vc4_hdmi_connector_funcs = { + .force = drm_atomic_helper_connector_hdmi_force, .fill_modes = drm_helper_probe_single_connector_modes, .reset = vc4_hdmi_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -584,6 +561,7 @@ static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs = }; static const struct drm_connector_hdmi_funcs vc4_hdmi_hdmi_connector_funcs; +static const struct drm_connector_hdmi_audio_funcs vc4_hdmi_audio_funcs; static int vc4_hdmi_connector_init(struct drm_device *dev, struct vc4_hdmi *vc4_hdmi) @@ -609,6 +587,12 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; + ret = drm_connector_hdmi_audio_init(connector, dev->dev, + &vc4_hdmi_audio_funcs, + 8, false, -1); + if (ret) + return ret; + drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -1921,9 +1905,9 @@ static bool vc4_hdmi_audio_can_stream(struct vc4_hdmi *vc4_hdmi) return true; } -static int vc4_hdmi_audio_startup(struct device *dev, void *data) +static int vc4_hdmi_audio_startup(struct drm_connector *connector) { - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); struct drm_device *drm = vc4_hdmi->connector.dev; unsigned long flags; int ret = 0; @@ -1985,9 +1969,9 @@ static void vc4_hdmi_audio_reset(struct vc4_hdmi *vc4_hdmi) spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); } -static void vc4_hdmi_audio_shutdown(struct device *dev, void *data) +static void vc4_hdmi_audio_shutdown(struct drm_connector *connector) { - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); struct drm_device *drm = vc4_hdmi->connector.dev; unsigned long flags; int idx; @@ -2057,13 +2041,12 @@ static int sample_rate_to_mai_fmt(int samplerate) } /* HDMI audio codec callbacks */ -static int vc4_hdmi_audio_prepare(struct device *dev, void *data, +static int vc4_hdmi_audio_prepare(struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); struct drm_device *drm = vc4_hdmi->connector.dev; - struct drm_connector *connector = &vc4_hdmi->connector; struct vc4_dev *vc4 = to_vc4_dev(drm); unsigned int sample_rate = params->sample_rate; unsigned int channels = params->channels; @@ -2075,7 +2058,7 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data, int ret = 0; int idx; - dev_dbg(dev, "%s: %u Hz, %d bit, %d channels\n", __func__, + dev_dbg(&vc4_hdmi->pdev->dev, "%s: %u Hz, %d bit, %d channels\n", __func__, sample_rate, params->sample_width, channels); mutex_lock(&vc4_hdmi->mutex); @@ -2214,40 +2197,12 @@ static const struct snd_dmaengine_pcm_config pcm_conf = { .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, }; -static int vc4_hdmi_audio_get_eld(struct device *dev, void *data, - uint8_t *buf, size_t len) -{ - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - struct drm_connector *connector = &vc4_hdmi->connector; - - mutex_lock(&connector->eld_mutex); - memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); - mutex_unlock(&connector->eld_mutex); - - return 0; -} - -static const struct hdmi_codec_ops vc4_hdmi_codec_ops = { - .get_eld = vc4_hdmi_audio_get_eld, +static const struct drm_connector_hdmi_audio_funcs vc4_hdmi_audio_funcs = { + .startup = vc4_hdmi_audio_startup, .prepare = vc4_hdmi_audio_prepare, - .audio_shutdown = vc4_hdmi_audio_shutdown, - .audio_startup = vc4_hdmi_audio_startup, -}; - -static struct hdmi_codec_pdata vc4_hdmi_codec_pdata = { - .ops = &vc4_hdmi_codec_ops, - .max_i2s_channels = 8, - .i2s = 1, + .shutdown = vc4_hdmi_audio_shutdown, }; -static void vc4_hdmi_audio_codec_release(void *ptr) -{ - struct vc4_hdmi *vc4_hdmi = ptr; - - platform_device_unregister(vc4_hdmi->audio.codec_pdev); - vc4_hdmi->audio.codec_pdev = NULL; -} - static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) { const struct vc4_hdmi_register *mai_data = @@ -2255,7 +2210,6 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) struct snd_soc_dai_link *dai_link = &vc4_hdmi->audio.link; struct snd_soc_card *card = &vc4_hdmi->audio.card; struct device *dev = &vc4_hdmi->pdev->dev; - struct platform_device *codec_pdev; const __be32 *addr; int index, len; int ret; @@ -2348,20 +2302,6 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } - codec_pdev = platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, - PLATFORM_DEVID_AUTO, - &vc4_hdmi_codec_pdata, - sizeof(vc4_hdmi_codec_pdata)); - if (IS_ERR(codec_pdev)) { - dev_err(dev, "Couldn't register the HDMI codec: %ld\n", PTR_ERR(codec_pdev)); - return PTR_ERR(codec_pdev); - } - vc4_hdmi->audio.codec_pdev = codec_pdev; - - ret = devm_add_action_or_reset(dev, vc4_hdmi_audio_codec_release, vc4_hdmi); - if (ret) - return ret; - dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; @@ -2374,7 +2314,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) dai_link->stream_name = "MAI PCM"; dai_link->codecs->dai_name = "i2s-hifi"; dai_link->cpus->dai_name = dev_name(dev); - dai_link->codecs->name = dev_name(&codec_pdev->dev); + dai_link->codecs->name = dev_name(&vc4_hdmi->connector.hdmi_audio.codec_pdev->dev); dai_link->platforms->name = dev_name(dev); card->dai_link = dai_link; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index b2424a21da23..e3d989ca302b 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -104,8 +104,6 @@ struct vc4_hdmi_audio { struct snd_soc_dai_link_component codec; struct snd_soc_dai_link_component platform; struct snd_dmaengine_dai_dma_data dma_data; - struct hdmi_audio_infoframe infoframe; - struct platform_device *codec_pdev; bool streaming; }; diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index b3664c12843d..f92133a01195 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -189,10 +189,11 @@ static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj) struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); struct virtio_gpu_device *vgdev = obj->dev->dev_private; struct dma_buf_attachment *attach = obj->import_attach; - struct dma_resv *resv = attach->dmabuf->resv; if (attach) { - dma_resv_lock(resv, NULL); + struct dma_buf *dmabuf = attach->dmabuf; + + dma_resv_lock(dmabuf->resv, NULL); virtio_gpu_detach_object_fenced(bo); @@ -200,10 +201,10 @@ static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj) dma_buf_unmap_attachment(attach, bo->sgt, DMA_BIDIRECTIONAL); - dma_resv_unlock(resv); + dma_resv_unlock(dmabuf->resv); - dma_buf_detach(attach->dmabuf, attach); - dma_buf_put(attach->dmabuf); + dma_buf_detach(dmabuf, attach); + dma_buf_put(dmabuf); } if (bo->created) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index a0e433fbcba6..9b5b8c1f063b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -228,7 +228,6 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv, VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_VRAM); buf->places[0].lpfn = PFN_UP(bo->resource->size); - buf->busy_places[0].lpfn = PFN_UP(bo->resource->size); ret = ttm_bo_validate(bo, &buf->placement, &ctx); /* For some reason we didn't end up at the start of vram */ @@ -443,7 +442,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, if (params->pin) ttm_bo_pin(&vmw_bo->tbo); - ttm_bo_unreserve(&vmw_bo->tbo); + if (!params->keep_resv) + ttm_bo_unreserve(&vmw_bo->tbo); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 43b5439ec9f7..11e330c7c7f5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -56,8 +56,9 @@ struct vmw_bo_params { u32 domain; u32 busy_domain; enum ttm_bo_type bo_type; - size_t size; bool pin; + bool keep_resv; + size_t size; struct dma_resv *resv; struct sg_table *sg; }; @@ -83,7 +84,6 @@ struct vmw_bo { struct ttm_placement placement; struct ttm_place places[5]; - struct ttm_place busy_places[5]; /* Protected by reservation */ struct ttm_bo_kmap_obj map; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 1699236fca5a..0f32471c8533 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -403,7 +403,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_kernel, .size = PAGE_SIZE, - .pin = true + .pin = true, + .keep_resv = true, }; /* @@ -415,10 +416,6 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); - vmw_bo_pin_reserved(vbo, true); - ret = ttm_bo_kmap(&vbo->tbo, 0, 1, &map); if (likely(ret == 0)) { result = ttm_kmap_obj_virtual(&map, &dummy); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index b9857f37ca1a..ed5015ced392 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -206,6 +206,7 @@ struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, .bo_type = ttm_bo_type_sg, .size = attach->dmabuf->size, .pin = false, + .keep_resv = true, .resv = attach->dmabuf->resv, .sg = table, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 8db38927729b..800a79e035ed 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -750,6 +750,7 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); struct vmw_bo *old_bo = NULL; struct vmw_bo *new_bo = NULL; + struct ww_acquire_ctx ctx; s32 hotspot_x, hotspot_y; int ret; @@ -769,9 +770,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + ww_acquire_init(&ctx, &reservation_ww_class); + if (!vmw_user_object_is_null(&old_vps->uo)) { old_bo = vmw_user_object_buffer(&old_vps->uo); - ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, &ctx); if (ret != 0) return; } @@ -779,9 +782,14 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, if (!vmw_user_object_is_null(&vps->uo)) { new_bo = vmw_user_object_buffer(&vps->uo); if (old_bo != new_bo) { - ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); - if (ret != 0) + ret = ttm_bo_reserve(&new_bo->tbo, false, false, &ctx); + if (ret != 0) { + if (old_bo) { + ttm_bo_unreserve(&old_bo->tbo); + ww_acquire_fini(&ctx); + } return; + } } else { new_bo = NULL; } @@ -803,10 +811,12 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } - if (old_bo) - ttm_bo_unreserve(&old_bo->tbo); if (new_bo) ttm_bo_unreserve(&new_bo->tbo); + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + + ww_acquire_fini(&ctx); du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index a01ca3226d0a..7fb1c88bcc47 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -896,7 +896,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_device, .size = size, - .pin = true + .pin = true, + .keep_resv = true, }; if (!vmw_shader_id_ok(user_key, shader_type)) @@ -906,10 +907,6 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out; - ret = ttm_bo_reserve(&buf->tbo, false, true, NULL); - if (unlikely(ret != 0)) - goto no_reserve; - /* Map and copy shader bytecode. */ ret = ttm_bo_kmap(&buf->tbo, 0, PFN_UP(size), &map); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 621d98b376bb..5553892d7c3e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -572,15 +572,14 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv, .busy_domain = domain, .bo_type = ttm_bo_type_kernel, .size = bo_size, - .pin = true + .pin = true, + .keep_resv = true, }; ret = vmw_bo_create(dev_priv, &bo_params, &vbo); if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); ret = vmw_ttm_populate(vbo->tbo.bdev, vbo->tbo.ttm, &ctx); if (likely(ret == 0)) { struct vmw_ttm_tt *vmw_tt = diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 2de0de41b8dd..0d749ed44878 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM bool "Enable passing SYS/VRAM addresses to user space" default n help - Pass object location trough uapi. Intended for extended + Pass object location through uapi. Intended for extended testing and development only. Recommended for driver developers only. @@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT Choose this option when debugging error paths that are hit during checks for userptr invalidations. - Recomended for driver developers only. + Recommended for driver developers only. If in doubt, say "N". diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h index e7898edc6236..dd4117553739 100644 --- a/drivers/gpu/drm/xe/abi/guc_capture_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h @@ -25,7 +25,7 @@ enum guc_state_capture_type { #define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1) -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum guc_capture_list_class_type { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7dcb118e3d9f..d633f1c739e4 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -132,7 +132,7 @@ enum { * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 * This config sets whether strict scheduling is enabled whereby any VF * that doesn’t have work to submit is still allocated a fixed execution - * time-slice to ensure active VFs execution is always consitent even + * time-slice to ensure active VFs execution is always consistent even * during other VF reprovisiong / rebooting events. Changing this KLV * impacts all VFs and takes effect on the next VF-Switch event. * @@ -207,7 +207,7 @@ enum { * of and this will never be perfectly-exact (accumulated nano-second * granularity) since the GPUs clock time runs off a different crystal * from the CPUs clock. Changing this KLV on a VF that is currently - * running a context wont take effect until a new context is scheduled in. + * running a context won't take effect until a new context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an * infinitely long compute or shader kernel. In such a scenario, the @@ -227,7 +227,7 @@ enum { * HW is capable and this will never be perfectly-exact (accumulated * nano-second granularity) since the GPUs clock time runs off a * different crystal from the CPUs clock. Changing this KLV on a VF - * that is currently running a context wont take effect until a new + * that is currently running a context won't take effect until a new * context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7c78496e6213..d86219dedde2 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -83,6 +83,8 @@ #define RING_IMR(base) XE_REG((base) + 0xa8) #define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) +#define CS_INT_VEC(base) XE_REG((base) + 0x1b8) + #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) #define RING_ESR(base) XE_REG((base) + 0xb8) @@ -138,6 +140,7 @@ #define RING_MODE(base) XE_REG((base) + 0x29c) #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) +#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 045dfd09db99..57944f90bbf6 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -25,6 +25,9 @@ #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) +#define CTX_CS_INT_VEC_REG 0x5a +#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) + #define INDIRECT_CTX_RING_HEAD (0x02 + 1) #define INDIRECT_CTX_RING_TAIL (0x04 + 1) #define INDIRECT_CTX_RING_START (0x06 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 51fd40ffafcb..0eedd6c26b1b 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -13,7 +13,7 @@ /** * struct xe_reg - Register definition * - * Register defintion to be used by the individual register. Although the same + * Register definition to be used by the individual register. Although the same * definition is used for xe_reg and xe_reg_mcr, they use different internal * APIs for accesses. */ diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3293172b0128..6cf282618836 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -44,12 +44,16 @@ #define MTL_RP_STATE_CAP XE_REG(0x138000) +#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008) #define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) #define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) #define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) #define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) +#define MTL_MPA_FREQUENCY XE_REG(0x138028) +#define MTL_RPA_MASK REG_GENMASK(8, 0) + #define MTL_MPE_FREQUENCY XE_REG(0x13802c) #define MTL_RPE_MASK REG_GENMASK(8, 0) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index c9ec7a313c6b..6795d1d916e4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -264,10 +264,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc * however seems quite fragile not to also restart the GT. Try * to do that here by triggering a GT reset. */ - for_each_gt(__gt, xe, id) { - xe_gt_reset_async(__gt); - flush_work(&__gt->reset.worker); - } + for_each_gt(__gt, xe, id) + xe_gt_reset(__gt); + if (err) { KUNIT_FAIL(test, "restore kernel err=%pe\n", ERR_PTR(err)); @@ -606,8 +605,6 @@ static void xe_bo_shrink_kunit(struct kunit *test) static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), - KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, - {.speed = KUNIT_SPEED_SLOW}), {} }; @@ -618,3 +615,17 @@ struct kunit_suite xe_bo_test_suite = { .init = xe_kunit_helper_xe_device_live_test_init, }; EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); + +static struct kunit_case xe_bo_shrink_test[] = { + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, + {.speed = KUNIT_SPEED_SLOW}), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_bo_shrink_test_suite = { + .name = "xe_bo_shrink", + .test_cases = xe_bo_shrink_test, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 0d36ab864ec0..81277c77016d 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -6,11 +6,13 @@ #include <kunit/test.h> extern struct kunit_suite xe_bo_test_suite; +extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; kunit_test_suite(xe_bo_test_suite); +kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 6f9b7a266b41..ef1e5256c56a 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt, mocs_dbg(gt, "reg_val=0x%x\n", reg_val); } else { - /* Just re-use value read on previous iteration */ + /* Just reuse value read on previous iteration */ reg_val >>= 16; } @@ -162,8 +162,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); - xe_gt_reset_async(gt); - flush_work(>->reset.worker); + xe_gt_reset(gt); kunit_info(test, "mocs_reset_test after reset\n"); if (flags & HAS_GLOBAL_MOCS) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index ef777dbdf4ec..9570672fce33 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) /* * We need to allocate space for the requested number of dwords, * one additional MI_BATCH_BUFFER_END dword, and additional buffer - * space to accomodate the platform-specific hardware prefetch + * space to accommodate the platform-specific hardware prefetch * requirements. */ bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 283cd0294570..3f5391d416d4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -733,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem->mem_type == XE_PL_SYSTEM) { long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, - true, + false, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { ret = timeout; @@ -786,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * / resume, some of the pinned memory is required for the * device to resume / use the GPU to move other evicted memory * (user memory) around. This likely could be optimized a bit - * futher where we find the minimum set of pinned memory + * further where we find the minimum set of pinned memory * required for resume but for simplity doing a memcpy for all * pinned memory. */ @@ -857,8 +857,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && - ttm_bo->ttm) + ttm_bo->ttm) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_KERNEL, + false, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + ret = timeout; + xe_tt_unmap_sg(ttm_bo->ttm); + } return ret; } @@ -867,7 +875,7 @@ out: * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * - * On successful completion, the object memory will be moved to sytem memory. + * On successful completion, the object memory will be moved to system memory. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -1362,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = { /** * xe_bo_alloc - Allocate storage for a struct xe_bo * - * This funcition is intended to allocate storage to be used for input + * This function is intended to allocate storage to be used for input * to __xe_bo_create_locked(), in the case a pointer to the bo to be * created is needed before the call to __xe_bo_create_locked(). * If __xe_bo_create_locked ends up never to be called, then the @@ -2404,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) * @force_alloc: Set force_alloc in ttm_operation_ctx * * On successful completion, the object memory will be moved to evict - * placement. Ths function blocks until the object has been fully moved. + * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h index f57d440cc95a..25a884c64bf1 100644 --- a/drivers/gpu/drm/xe/xe_bo_doc.h +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -41,7 +41,7 @@ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All * user BOs are evictable and user BOs are never pinned by XE. The allocation of - * the backing store can be defered from creation time until first use which is + * the backing store can be deferred from creation time until first use which is * either mmap, bind, or pagefault. * * Private BOs diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 71636e80b71d..81dc7795c065 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -48,7 +48,7 @@ * * **Coredump release**: * After a coredump is generated, it stays in kernel memory until released by - * userpace by writing anything to it, or after an internal timer expires. The + * userspace by writing anything to it, or after an internal timer expires. The * exact timeout may vary and should not be relied upon. Example to release * a coredump: * @@ -119,7 +119,11 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - drm_puts(&p, "\n**** Contexts ****\n"); + /* + * Don't add a new section header here because the mesa debug decoder + * tool expects the context information to be in the 'GuC CT' section. + */ + /* drm_puts(&p, "\n**** Contexts ****\n"); */ xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -416,6 +420,15 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; + /* + * Splitting blobs across multiple lines is not compatible with the mesa + * debug decoder tool. Note that even dropping the explicit '\n' below + * doesn't help because the GuC log is so big some underlying implementation + * still splits the lines at 512K characters. So just bail completely for + * the moment. + */ + return; + #define DMESG_MAX_LINE_LEN 800 #define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 56d4ffb650da..4e1839b483a0 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -325,7 +325,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; - spin_lock_init(&xe->irq.lock); + err = xe_irq_init(xe); + if (err) + goto err; init_waitqueue_head(&xe->ufence_wq); @@ -519,7 +521,7 @@ static int wait_for_lmem_ready(struct xe_device *xe) drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); start = jiffies; - timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ + timeout = start + secs_to_jiffies(60); /* 60 sec! */ do { if (signal_pending(current)) @@ -604,7 +606,7 @@ static int probe_has_flat_ccs(struct xe_device *xe) u32 reg; /* Always enabled/disabled, no runtime check to do */ - if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) + if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe)) return 0; gt = xe_root_mmio_gt(xe); @@ -997,7 +999,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a mudule + * This is a final state that can only be cleared with a module * re-probe (unbind + bind). * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f1fbfe916867..fc3c2af3fb7f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe) static inline bool xe_device_has_msix(struct xe_device *xe) { - /* TODO: change this when MSI-X support is fully integrated */ - return false; + return xe->irq.msix.nvec > 0; } static inline bool xe_device_has_memirq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ace22e35e769..8a7b15972413 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -348,6 +348,14 @@ struct xe_device { /** @irq.enabled: interrupts enabled on this device */ atomic_t enabled; + + /** @irq.msix: irq info for platforms that support MSI-X */ + struct { + /** @irq.msix.nvec: number of MSI-X interrupts */ + u16 nvec; + /** @irq.msix.indexes: used to allocate MSI-X indexes */ + struct xarray indexes; + } msix; } irq; /** @ttm: ttm device */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 298a587da7f1..63f30b6df70b 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -261,6 +261,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) if (man) { drm_print_memory_stats(p, &stats[mem_type], + DRM_GEM_OBJECT_ACTIVE | DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), @@ -384,7 +385,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) * @p: The drm_printer ptr * @file: The drm_file ptr * - * This is callabck for drm fdinfo interface. Register this callback + * This is callback for drm fdinfo interface. Register this callback * in drm driver ops for show_fdinfo. * * Return: void diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 31cca938956f..df8ce550deb4 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -33,7 +33,7 @@ * * In XE we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as - * seperate operations, and using the DRM scheduler to flow control the ring. + * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. * * We can get away from a BO list by forcing the user to use in / out fences on diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index aab9e561153d..7e1abbbfba12 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -8,6 +8,7 @@ #include <linux/nospec.h> #include <drm/drm_device.h> +#include <drm/drm_drv.h> #include <drm/drm_file.h> #include <uapi/drm/xe_drm.h> @@ -16,6 +17,7 @@ #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" @@ -68,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->gt = gt; q->class = hwe->class; q->width = width; + q->msix_vec = XE_IRQ_DEFAULT_MSIX; q->logical_mask = logical_mask; q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; @@ -117,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) } for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); goto err_unlock; @@ -766,20 +769,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { - struct xe_file *xef; + struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; u32 old_ts, new_ts; + int idx; /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. + * Jobs that are executed by kernel doesn't have a corresponding xe_file + * and thus are not accounted. */ - if (!q->vm || !q->vm->xef) + if (!q->xef) return; - xef = q->vm->xef; - + /* Synchronize with unbind while holding the xe file open */ + if (!drm_dev_enter(&xe->drm, &idx)) + return; /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -790,7 +794,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + + drm_dev_exit(idx); } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1158b6062a6c..5af5419cec7a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -41,7 +41,7 @@ struct xe_exec_queue { /** @xef: Back pointer to xe file if this is user created exec queue */ struct xe_file *xef; - /** @gt: graphics tile this exec queue can submit to */ + /** @gt: GT structure this exec queue can submit to */ struct xe_gt *gt; /** * @hwe: A hardware of the same class. May (physical engine) or may not @@ -63,6 +63,8 @@ struct xe_exec_queue { char name[MAX_FENCE_NAME_LEN]; /** @width: width (number BB submitted per exec) of this exec queue */ u16 width; + /** @msix_vec: MSI-X vector (for platforms that support it) */ + u16 msix_vec; /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index a8c416a48812..5ef96deaa881 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -17,6 +17,7 @@ #include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, struct xe_mmio *mmio = >->mmio; struct xe_device *xe = gt_to_xe(gt); u64 lrc_desc; + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); lrc_desc = xe_lrc_descriptor(lrc); @@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), xe_bo_ggtt_addr(hwe->hwsp)); xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); - xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), lower_32_bits(lrc_desc)); @@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 05154f9de1a6..5fcb2b4c2c13 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) /* * So we don't need to worry about 64K GGTT layout when dealing with - * scratch entires, rather keep the scratch page in system memory on + * scratch entries, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ flags = XE_BO_FLAG_PINNED; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 41ab7fbebc19..26e64530ada2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); + err = xe_gt_tlb_invalidation_init_early(gt); + if (err) + return err; + return 0; } @@ -588,10 +592,6 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_tlb_invalidation_init(gt); - if (err) - return err; - err = xe_gt_pagefault_init(gt); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 82b9b7f82fca..e504cc33ade4 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt); /** * xe_gt_record_user_engines - save data related to engines available to - * usersapce + * userspace * @gt: GT structure * * Walk the available HW engines from gt->info.engine_mask and calculate data @@ -57,6 +57,31 @@ int xe_gt_sanitize_freq(struct xe_gt *gt); void xe_gt_remove(struct xe_gt *gt); /** + * xe_gt_wait_for_reset - wait for gt's async reset to finalize. + * @gt: GT structure + * Return: + * %true if it waited for the work to finish execution, + * %false if there was no scheduled reset or it was done. + */ +static inline bool xe_gt_wait_for_reset(struct xe_gt *gt) +{ + return flush_work(>->reset.worker); +} + +/** + * xe_gt_reset - perform synchronous reset + * @gt: GT structure + * Return: + * %true if it waited for the reset to finish, + * %false if there was no scheduled reset. + */ +static inline bool xe_gt_reset(struct xe_gt *gt) +{ + xe_gt_reset_async(gt); + return xe_gt_wait_for_reset(gt); +} + +/** * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the * first that matches the same reset domain as @class * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index b6adfb9f2030..50fffc9ebf62 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -150,7 +150,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); - xe_gt_reset_async(gt); + xe_gt_reset(gt); } mutex_unlock(&xe->drm.filelist_mutex); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 3e8c351a0eab..e7792858b1e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -132,11 +132,9 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p) static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); + xe_gt_reset(gt); xe_pm_runtime_put(gt_to_xe(gt)); - flush_work(>->reset.worker); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 6bd39b2c5003..604bdc7c8173 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev, } static DEVICE_ATTR_RO(rpe_freq); +static ssize_t rpa_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpa_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); +} +static DEVICE_ATTR_RO(rpa_freq); + static ssize_t rpn_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = { &dev_attr_act_freq.attr, &dev_attr_cur_freq.attr, &dev_attr_rp0_freq.attr, + &dev_attr_rpa_freq.attr, &dev_attr_rpe_freq.attr, &dev_attr_rpn_freq.attr, &dev_attr_min_freq.attr, diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fd80afeef56a..ffd3ba7f6656 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -122,10 +122,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; - for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { - if ((gt->info.engine_mask & BIT(i))) - gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | - VDN_MFXVDENC_POWERGATE_ENABLE(j)); + if (xe->info.platform != XE_DG1) { + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if ((gt->info.engine_mask & BIT(i))) + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | + VDN_MFXVDENC_POWERGATE_ENABLE(j)); + } } fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 5013d674e17d..a1676b787fdc 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, * @group: steering group ID * @instance: steering instance ID * - * Return: the coverted DSS id. + * Return: the converted DSS id. */ u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance) { @@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) * Returns true if the caller should steer to the @group/@instance values * returned. Returns false if the caller need not perform any steering */ -static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, - struct xe_reg_mcr reg_mcr, - u8 *group, u8 *instance) +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance) { const struct xe_reg reg = to_xe_reg(reg_mcr); const struct xe_mmio_range *implicit_ranges; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index c0cd36021c24..bc06520befab 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, u32 value); +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance); + void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 65082f12f1a8..878e96281c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2120,7 +2120,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { - bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); + bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; @@ -2161,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid) * * This function can only be called on PF. * - * Return: mininum size of the buffer or the number of bytes saved, + * Return: minimum size of the buffer or the number of bytes saved, * or a negative error code on failure. */ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index c7364a5aef8f..7a6c1d808e41 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -12,7 +12,7 @@ /** * xe_gt_stats_incr - Increments the specified stats counter - * @gt: graphics tile + * @gt: GT structure * @id: xe_gt_stats_id type id that needs to be incremented * @incr: value to be incremented with * @@ -32,7 +32,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { /** * xe_gt_stats_print_info - Print the GT stats - * @gt: graphics tile + * @gt: GT structure * @p: drm_printer where it will be printed out. * * This prints out all the available GT stats. diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 665927b80e9e..0a93831c0a02 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -106,15 +106,15 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) } /** - * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state - * @gt: graphics tile + * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state + * @gt: GT structure * * Initialize GT TLB invalidation state, purely software initialization, should * be called once during driver load. * * Return: 0 on success, negative error code on error. */ -int xe_gt_tlb_invalidation_init(struct xe_gt *gt) +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); @@ -128,7 +128,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: graphics tile + * @gt: GT structure * * Signal any pending invalidation fences, should be called during a GT reset */ @@ -244,7 +244,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @@ -277,7 +277,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, /** * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: graphics tile + * @gt: GT structure * * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is * synchronous. @@ -326,7 +326,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range * - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @start: start address @@ -412,7 +412,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL * @vma: VMA to invalidate diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 00b1c6c01e8d..672acfcdf0d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -14,7 +14,8 @@ struct xe_gt; struct xe_guc; struct xe_vma; -int xe_gt_tlb_invalidation_init(struct xe_gt *gt); +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); + void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 4e2868efb620..408365dfe4ee 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -147,6 +147,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } +static bool needs_wa_dual_queue(struct xe_gt *gt) +{ + /* + * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions + * on RCS and CCSes with different address spaces, which on DG2 is + * required as a WA for an HW bug. + */ + if (XE_WA(gt, 22011391025)) + return true; + + /* + * On newer platforms, the HW has been updated to not allow parallel + * execution of different address spaces, so the RCS/CCS will stall the + * context switch if one of the other RCS/CCSes is busy with a different + * address space. While functionally correct, having a submission + * stalled on the HW limits the GuC ability to shuffle things around and + * can cause complications if the non-stalled submission runs for a long + * time, because the GuC doesn't know that the stalled submission isn't + * actually running and might declare it as hung. Therefore, we enable + * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines + * to move management back to the GuC. + */ + if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + return true; + + return false; +} + static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -159,7 +187,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; - if (XE_WA(gt, 22011391025)) + if (needs_wa_dual_queue(gt)) flags |= GUC_WA_DUAL_QUEUE; /* diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 943146e5b460..fab259adc380 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -29,6 +29,7 @@ #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wa.h" +#include "xe_gt_mcr.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -696,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, .flags = reg.masked ? GUC_REGSET_MASKED : 0, }; + if (reg.mcr) { + struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr); + u8 group, instance; + + bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg, + &group, &instance); + + if (steer) { + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group); + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance); + entry.flags |= GUC_REGSET_STEERING_NEEDED; + } + } + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), &entry, sizeof(entry)); } diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 137571fae4ed..f6d523e4c5fe 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1955,7 +1955,7 @@ xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) } /* - * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes + * xe_guc_capture_put_matched_nodes - Cleanup matched nodes * @guc: The GuC object * * Free matched node and all nodes with the equal guc_id from diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index 2057125b1bfa..ca2d390ccbee 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -22,7 +22,7 @@ enum capture_register_data_type { * struct __guc_mmio_reg_descr - GuC mmio register descriptor * * xe_guc_capture module uses these structures to define a register - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -48,7 +48,7 @@ struct __guc_mmio_reg_descr { * * xe_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d33f3a11e61..8b65c5e959cc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -710,7 +710,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, --len; ++action; - /* Write H2G ensuring visable before descriptor update */ + /* Write H2G ensuring visible before descriptor update */ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(xe); @@ -1383,7 +1383,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) * this function and nowhere else. Hence, they cannot be different * unless two g2h_read calls are running concurrently. Which is not * possible because it is guarded by ct->fast_lock. And yet, some - * discrete platforms are reguarly hitting this error :(. + * discrete platforms are regularly hitting this error :(. * * desc_head rolling backwards shouldn't cause any noticeable * problems - just a delay in GuC being allowed to proceed past that diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e8b9faeaef64..df7f130fb663 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -38,6 +38,7 @@ #define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) +#define RPA_MASK REG_GENMASK(31, 16) #define GT_PERF_STATUS XE_REG(0x1381b4) #define CAGF_MASK REG_GENMASK(19, 11) @@ -328,6 +329,19 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } +static void mtl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(>->mmio, MTL_MPA_FREQUENCY); + else + reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); + + pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); +} + static void mtl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -341,6 +355,25 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } +static void tgl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + else + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + + pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + static void tgl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -365,10 +398,13 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1270) + if (GRAPHICS_VERx100(xe) >= 1270) { + mtl_update_rpa_value(pc); mtl_update_rpe_value(pc); - else + } else { + tgl_update_rpa_value(pc); tgl_update_rpe_value(pc); + } /* * RPe is decided at runtime by PCODE. In the rare case where that's @@ -421,8 +457,8 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } @@ -448,6 +484,19 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) } /** + * xe_guc_pc_get_rpa_freq - Get the RPa freq + * @pc: The GuC PC + * + * Returns: RPa freq. + */ +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) +{ + pc_update_rp_values(pc); + + return pc->rpa_freq; +} + +/** * xe_guc_pc_get_rpe_freq - Get the RPe freq * @pc: The GuC PC * @@ -481,10 +530,10 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret; + xe_device_assert_mem_access(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -492,24 +541,12 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) goto out; } - /* - * GuC SLPC plays with min freq request when GuCRC is enabled - * Block RC6 for a more reliable read. - */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - ret = -ETIMEDOUT; - goto fw; - } - ret = pc_action_query_task_state(pc); if (ret) - goto fw; + goto out; *freq = pc_get_min_freq(pc); -fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); out: mutex_unlock(&pc->freq_lock); return ret; @@ -969,8 +1006,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index efda432fadfc..619f59cd633c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -21,6 +21,7 @@ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 13810be015db..2978ac9a249b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -17,6 +17,8 @@ struct xe_guc_pc { struct xe_bo *bo; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; + /** @rpa_freq: HW RPa frequency - The Achievable one */ + u32 rpa_freq; /** @rpe_freq: HW RPe frequency - The Efficient one */ u32 rpe_freq; /** @rpn_freq: HW RPN frequency - The Minimum one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9c36329fe857..913c74d6e2ae 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1226,7 +1226,7 @@ sched_enable: enable_scheduling(q); rearm: /* - * XXX: Ideally want to adjust timeout based on current exection time + * XXX: Ideally want to adjust timeout based on current execution time * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2c32dc46f7d4..089834467880 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -159,7 +159,7 @@ void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) * This function allocates the storage of the userptr sg table. * It is caller's responsibility to free it calling sg_free_table. * - * returns: 0 for succuss; negative error no on failure + * returns: 0 for success; negative error no on failure */ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b19366744148..fc447751fe78 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) { u32 ccs_mask = xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, @@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); @@ -419,7 +422,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | @@ -772,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); } } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index e14bee95e364..e4191a7a2c31 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -106,7 +106,7 @@ struct xe_hw_engine_class_intf { * Contains all the hardware engine state for physical instances. */ struct xe_hw_engine { - /** @gt: graphics tile this hw engine belongs to */ + /** @gt: GT structure this hw engine belongs to */ struct xe_gt *gt; /** @name: name of this hw engine */ const char *name; diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index 364a61f4bfda..58a8d09afe5c 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -41,7 +41,7 @@ struct xe_hw_fence_irq { * to a xe_hw_fence_irq, maintains serial seqno. */ struct xe_hw_fence_ctx { - /** @gt: graphics tile of hardware fence context */ + /** @gt: GT structure of hardware fence context */ struct xe_gt *gt; /** @irq: fence irq handler */ struct xe_hw_fence_irq *irq; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 1c509e66694d..32f5a67a917b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -10,6 +10,7 @@ #include <drm/drm_managed.h> #include "display/xe_display.h" +#include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_device.h" #include "xe_drv.h" @@ -29,6 +30,11 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) +static int xe_irq_msix_init(struct xe_device *xe); +static void xe_irq_msix_free(struct xe_device *xe); +static int xe_irq_msix_request_irqs(struct xe_device *xe); +static void xe_irq_msix_synchronize_irq(struct xe_device *xe); + static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); @@ -572,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_reset(xe); + if (xe_device_uses_memirq(xe)) { + for_each_tile(tile, xe, id) + xe_memirq_reset(&tile->memirq); + } + for_each_tile(tile, xe, id) { if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_reset(tile); @@ -614,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_postinstall(xe); + if (xe_device_uses_memirq(xe)) { + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + xe_memirq_postinstall(&tile->memirq); + } + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); /* @@ -656,60 +675,83 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe) return xelp_irq_handler; } -static void irq_uninstall(void *arg) +static int xe_irq_msi_request_irqs(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + irq_handler_t irq_handler; + int irq, err; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + irq = pci_irq_vector(pdev, 0); + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); + return err; + } + + return 0; +} + +static void xe_irq_msi_free(struct xe_device *xe) { - struct xe_device *xe = arg; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int irq; + irq = pci_irq_vector(pdev, 0); + free_irq(irq, xe); +} + +static void irq_uninstall(void *arg) +{ + struct xe_device *xe = arg; + if (!atomic_xchg(&xe->irq.enabled, 0)) return; xe_irq_reset(xe); - irq = pci_irq_vector(pdev, 0); - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); +} + +int xe_irq_init(struct xe_device *xe) +{ + spin_lock_init(&xe->irq.lock); + + return xe_irq_msix_init(xe); } int xe_irq_install(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - unsigned int irq_flags = PCI_IRQ_MSIX; - irq_handler_t irq_handler; - int err, irq, nvec; - - irq_handler = xe_irq_handler(xe); - if (!irq_handler) { - drm_err(&xe->drm, "No supported interrupt handler"); - return -EINVAL; - } + unsigned int irq_flags = PCI_IRQ_MSI; + int nvec = 1; + int err; xe_irq_reset(xe); - nvec = pci_msix_vec_count(pdev); - if (nvec <= 0) { - if (nvec == -EINVAL) { - /* MSIX capability is not supported in the device, using MSI */ - irq_flags = PCI_IRQ_MSI; - nvec = 1; - } else { - drm_err(&xe->drm, "MSIX: Failed getting count\n"); - return nvec; - } + if (xe_device_has_msix(xe)) { + nvec = xe->irq.msix.nvec; + irq_flags = PCI_IRQ_MSIX; } err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags); if (err < 0) { - drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); + drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err); return err; } - irq = pci_irq_vector(pdev, 0); - err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); - if (err < 0) { - drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); + err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) : + xe_irq_msi_request_irqs(xe); + if (err) return err; - } atomic_set(&xe->irq.enabled, 1); @@ -722,18 +764,28 @@ int xe_irq_install(struct xe_device *xe) return 0; free_irq_handler: - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); return err; } -void xe_irq_suspend(struct xe_device *xe) +static void xe_irq_msi_synchronize_irq(struct xe_device *xe) { - int irq = to_pci_dev(xe->drm.dev)->irq; + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); +} +void xe_irq_suspend(struct xe_device *xe) +{ atomic_set(&xe->irq.enabled, 0); /* no new irqs */ - synchronize_irq(irq); /* flush irqs */ + /* flush irqs */ + if (xe_device_has_msix(xe)) + xe_irq_msix_synchronize_irq(xe); + else + xe_irq_msi_synchronize_irq(xe); xe_irq_reset(xe); /* turn irqs off */ } @@ -754,3 +806,198 @@ void xe_irq_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_irq_enable_hwe(gt); } + +/* MSI-X related definitions and functions below. */ + +enum xe_irq_msix_static { + GUC2HOST_MSIX = 0, + DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX, + /* Must be last */ + NUM_OF_STATIC_MSIX, +}; + +static int xe_irq_msix_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int nvec = pci_msix_vec_count(pdev); + + if (nvec == -EINVAL) + return 0; /* MSI */ + + if (nvec < 0) { + drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec); + return nvec; + } + + xe->irq.msix.nvec = nvec; + xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC); + return 0; +} + +static irqreturn_t guc2host_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile; + u8 id; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, id) + xe_guc_irq_handler(&tile->primary_gt->uc.guc, + GUC_INTR_GUC2HOST); + + return IRQ_HANDLED; +} + +static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg) +{ + unsigned int tile_id, gt_id; + struct xe_device *xe = arg; + struct xe_memirq *memirq; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_tile *tile; + struct xe_gt *gt; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, tile_id) { + memirq = &tile->memirq; + if (!memirq->bo) + continue; + + for_each_gt(gt, xe, gt_id) { + if (gt->tile != tile) + continue; + + for_each_hw_engine(hwe, gt, id) + xe_memirq_hwe_handler(memirq, hwe); + } + } + + return IRQ_HANDLED; +} + +static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf, + bool dynamic_msix, u16 *msix) +{ + struct xa_limit limit; + int ret; + u32 id; + + limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) : + XA_LIMIT(*msix, *msix); + ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL); + if (ret) + return ret; + + if (dynamic_msix) + *msix = id; + + return 0; +} + +static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix) +{ + xa_erase(&xe->irq.msix.indexes, msix); +} + +static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler, + void *irq_buf, const char *name, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret, irq; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) + return irq; + + ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf); + if (ret < 0) + return ret; + + return 0; +} + +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix) +{ + int ret; + + ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix); + if (ret) + return ret; + + ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix); + if (ret) { + drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix); + xe_irq_msix_release_vector(xe, *msix); + return ret; + } + + return 0; +} + +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq; + void *irq_buf; + + irq_buf = xa_load(&xe->irq.msix.indexes, msix); + if (!irq_buf) + return; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) { + drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix); + return; + } + + free_irq(irq, irq_buf); + xe_irq_msix_release_vector(xe, msix); +} + +int xe_irq_msix_request_irqs(struct xe_device *xe) +{ + int err; + u16 msix; + + msix = GUC2HOST_MSIX; + err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe, + DRIVER_NAME "-guc2host", false, &msix); + if (err) + return err; + + msix = DEFAULT_MSIX; + err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe, + DRIVER_NAME "-default-msix", false, &msix); + if (err) { + xe_irq_msix_free_irq(xe, GUC2HOST_MSIX); + return err; + } + + return 0; +} + +void xe_irq_msix_free(struct xe_device *xe) +{ + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + xe_irq_msix_free_irq(xe, msix); + xa_destroy(&xe->irq.msix.indexes); +} + +void xe_irq_msix_synchronize_irq(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + synchronize_irq(pci_irq_vector(pdev, msix)); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index 067514e13675..a28bd577ba52 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -6,13 +6,21 @@ #ifndef _XE_IRQ_H_ #define _XE_IRQ_H_ +#include <linux/interrupt.h> + +#define XE_IRQ_DEFAULT_MSIX 1 + struct xe_device; struct xe_tile; struct xe_gt; +int xe_irq_init(struct xe_device *xe); int xe_irq_install(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); void xe_irq_enable_hwe(struct xe_gt *gt); +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix); +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 22e58c6e2a35..bbb9ffbf6367 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -584,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) { struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; struct xe_device *xe = gt_to_xe(hwe->gt); + u8 num_regs; if (!xe_device_uses_memirq(xe)) return; @@ -593,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + num_regs = xe_device_has_msix(xe) ? 3 : 2; + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); + + if (xe_device_has_msix(xe)) { + regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; + /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ + } } static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) @@ -876,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) + struct xe_vm *vm, u32 ring_size, u16 msix_vec) { struct xe_gt *gt = hwe->gt; struct xe_tile *tile = gt_to_tile(gt); @@ -945,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_drm_client_add_bo(vm->xef->client, lrc->bo); } + if (xe_device_has_msix(xe)) { + xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, + xe_memirq_status_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, + xe_memirq_source_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); + } + if (xe_gt_has_indirect_ring_state(gt)) { xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, __xe_lrc_indirect_ring_ggtt_addr(lrc)); @@ -1005,6 +1020,7 @@ err_lrc_finish: * @hwe: Hardware Engine * @vm: The VM (address space) * @ring_size: LRC ring size + * @msix_vec: MSI-X interrupt vector (for platforms that support it) * * Allocate and initialize the Logical Ring Context (LRC). * @@ -1012,7 +1028,7 @@ err_lrc_finish: * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) + u32 ring_size, u16 msix_vec) { struct xe_lrc *lrc; int err; @@ -1021,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size); + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); if (err) { kfree(lrc); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index b459dcab8787..4206e6a8b50a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -42,7 +42,7 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size); + u32 ring_size, u16 msix_vec); void xe_lrc_destroy(struct kref *ref); /** diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1b97d90aadda..278bc96cf593 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1506,7 +1506,7 @@ err_bb: * using the default engine for the updates, they will be performed in the * order they grab the job_mutex. If different engines are used, external * synchronization is needed for overlapping updates to maintain page-table - * consistency. Note that the meaing of "overlapping" is that the updates + * consistency. Note that the meaning of "overlapping" is that the updates * touch the same page-table, which might be a higher-level page-directory. * If no pipelining is needed, then updates may be performed by the cpu. * diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index ec88b18e9baa..eeb96b5f49e2 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -16,7 +16,6 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -28,7 +27,6 @@ #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc_pc.h" -#include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -74,12 +72,6 @@ struct xe_oa_config { struct rcu_head rcu; }; -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - struct xe_oa_open_param { struct xe_file *xef; u32 oa_unit_id; @@ -97,6 +89,7 @@ struct xe_oa_open_param { int num_syncs; struct xe_sync_entry *syncs; size_t oa_buffer_size; + int wait_num_reports; }; struct xe_oa_config_bo { @@ -241,11 +234,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 partial_report_size; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -289,8 +281,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); + pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -605,19 +597,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } +static void xe_oa_lock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + down_read(&q->vm->lock); + xe_vm_lock(q->vm, false); + } +} + +static void xe_oa_unlock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + xe_vm_unlock(q->vm); + up_read(&q->vm->lock); + } +} + static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, struct xe_bb *bb) { + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; struct xe_sched_job *job; struct dma_fence *fence; int err = 0; - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); + xe_oa_lock_vma(q); + + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { err = PTR_ERR(job); goto exit; } + job->ggtt = true; if (deps == XE_OA_SUBMIT_ADD_DEPS) { for (int i = 0; i < stream->num_syncs && !err; i++) @@ -632,10 +643,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); + xe_oa_unlock_vma(q); + return fence; err_put_job: xe_sched_job_put(job); exit: + xe_oa_unlock_vma(q); return ERR_PTR(err); } @@ -684,63 +698,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) dma_fence_put(stream->last_fence); } -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); -} - -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) { struct dma_fence *fence; struct xe_bb *bb; int err; - bb = xe_bb_new(stream->gt, 4 * count, false); + bb = xe_bb_new(stream->gt, 2 * count + 1, false); if (IS_ERR(bb)) { err = PTR_ERR(bb); goto exit; } - xe_oa_store_flex(stream, lrc, bb, flex, count); - - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_bb; - } - xe_bb_free(bb, fence); - dma_fence_put(fence); - - return 0; -free_bb: - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct dma_fence *fence; - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - write_cs_mi_lri(bb, reg_lri, 1); + write_cs_mi_lri(bb, reg_lri, count); fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); if (IS_ERR(fence)) { @@ -760,71 +730,55 @@ exit: static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAR_OACONTROL, + oacontrol, + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) }, }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAC_OACONTROL, + oacontrol + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; /* Set ccs select to enable programming of OAC_OACONTROL */ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) @@ -1285,6 +1239,17 @@ static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!value) { + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); + return -EINVAL; + } + param->wait_num_reports = value; + return 0; +} + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { @@ -1306,6 +1271,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, }; static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { @@ -1321,6 +1287,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, @@ -1704,81 +1671,6 @@ static const struct file_operations xe_oa_fops = { .mmap = xe_oa_mmap, }; -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -1797,6 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->wait_num_reports = param->wait_num_reports; stream->xef = xe_file_get(param->xef); stream->num_syncs = param->num_syncs; @@ -1815,17 +1708,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, else stream->oa_buffer.circ_size = param->oa_buffer_size; - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } - stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) { drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); @@ -2094,8 +1976,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) + return -EOPNOTSUPP; } /* @@ -2156,6 +2038,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (!param.oa_buffer_size) param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + if (!param.wait_num_reports) + param.wait_num_reports = 1; + if (param.wait_num_reports > param.oa_buffer_size / f->size) { + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); + ret = -EINVAL; + goto err_exec_q; + } + ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q; @@ -2273,6 +2163,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index df7793915628..52e33c37d5ee 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -138,9 +138,6 @@ struct xe_oa { /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ struct idr metrics_idr; - /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ - u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; - /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; @@ -218,6 +215,9 @@ struct xe_oa_stream { /** @pollin: Whether there is data available to read */ bool pollin; + /** @wait_num_reports: Number of reports to wait for before signalling pollin */ + int wait_num_reports; + /** @periodic: Whether periodic sampling is currently enabled */ bool periodic; diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index 8ec1b84cbb9e..57cf01efc07f 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -56,7 +56,7 @@ int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } } -static struct ctl_table observation_ctl_table[] = { +static const struct ctl_table observation_ctl_table[] = { { .procname = "observation_paranoid", .data = &xe_observation_paranoid, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6b7f77425c7f..39be74848e44 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -490,7 +490,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, * least basic xe_gt and xe_guc initialization. * * Since to obtain the value of GMDID_MEDIA we need to use the - * media GuC, temporarly tweak the gt type. + * media GuC, temporarily tweak the gt type. */ xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -781,7 +781,7 @@ static void xe_pci_remove(struct pci_dev *pdev) * error injectable functions is proper handling of the error code by the * caller for recovery, which is always the case here. The second * requirement is that no state is changed before the first error return. - * It is not strictly fullfilled for all initialization functions using the + * It is not strictly fulfilled for all initialization functions using the * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those * error cases at probe time, the error code is simply propagated up by the * caller. Therefore there is no consequence on those specific callers when diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index d95d9835de42..9333ce776a6e 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -217,7 +217,7 @@ out: * * It returns 0 on success, and -ERROR number on failure, -EINVAL if max * frequency is higher then the minimal, and other errors directly translated - * from the PCODE Error returs: + * from the PCODE Error returns: * - -ENXIO: "Illegal Command" * - -ETIMEDOUT: "Timed out" * - -EINVAL: "Illegal Data" diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a6761cb769b2..c9cc0c091dfd 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -7,6 +7,7 @@ #include <linux/fault-inject.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include <drm/drm_managed.h> #include <drm/ttm/ttm_placement.h> @@ -390,7 +391,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify - * also checks and delets bo entry from user fault list. + * also checks and deletes bo entry from user fault list. */ mutex_lock(&xe->mem_access.vram_userfault.lock); list_for_each_entry_safe(bo, on, @@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) struct device *dev = xe->drm.dev; return dev->power.runtime_status == RPM_SUSPENDING || - dev->power.runtime_status == RPM_RESUMING; + dev->power.runtime_status == RPM_RESUMING || + pm_suspend_target_state != PM_SUSPEND_ON; #else return false; #endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 65c3c1688710..1ddcc7e79a93 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -276,7 +276,7 @@ struct xe_pt_stage_bind_walk { /* Also input, but is updated during the walk*/ /** @curs: The DMA address cursor. */ struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ + /** @va_curs_start: The Virtual address corresponding to @curs->start */ u64 va_curs_start; /* Output */ diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d2a816f71bf2..c059639613f7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -672,7 +672,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | - DRM_XE_OA_CAPS_OA_BUFFER_SIZE; + DRM_XE_OA_CAPS_OA_BUFFER_SIZE | + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 0be4f489d3e1..9f327f27c072 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return job->q->vm ? BIT(8) : 0; + if (job->q->vm && !job->ggtt) + return BIT(8); + + return 0; } static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b13d4d62f0b1..7a1c78fdfc92 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -340,3 +340,8 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, return dss >= dss_per_gslice; } +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return !IS_SRIOV_VF(gt_to_xe(gt)); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 827d932b6908..38b9f13bba5e 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -131,7 +131,7 @@ struct xe_reg_sr; * @ver_end__: Last graphics IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -169,7 +169,7 @@ struct xe_reg_sr; * @ver_end__: Last media IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -476,4 +476,15 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/* + * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device + * + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if device is not VF, false otherwise. + */ +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index f13f333f00be..d942b20a9f29 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -56,6 +56,8 @@ struct xe_sched_job { u32 migrate_flush_flags; /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ bool ring_ops_flush_tlb; + /** @ggtt: mapped in ggtt. */ + bool ggtt; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 1762dd30ba6d..ea50fee50c7d 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -60,8 +60,8 @@ TRACE_EVENT(xe_bo_move, TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) + __string(new_placement_name, xe_mem_type_to_name[new_placement]) + __string(old_placement_name, xe_mem_type_to_name[old_placement]) __string(device_id, __dev_name_bo(bo)) __field(bool, move_lacks_source) ), @@ -69,15 +69,15 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; + __assign_str(new_placement_name); + __assign_str(old_placement_name); __assign_str(device_id); __entry->move_lacks_source = move_lacks_source; ), TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) + __get_str(old_placement_name), + __get_str(new_placement_name), __get_str(device_id)) ); DECLARE_EVENT_CLASS(xe_vma, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index c95728c45ea4..f4a16e5fa770 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -5,6 +5,7 @@ */ #include <drm/drm_managed.h> +#include <drm/drm_drv.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> @@ -311,6 +312,13 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, struct ttm_resource_manager *man = &mgr->manager; int err; + if (mem_type != XE_PL_STOLEN) { + const char *name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1"; + man->cg = drmm_cgroup_register_region(&xe->drm, name, size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); + } + man->func = &xe_ttm_vram_mgr_func; mgr->mem_type = mem_type; mutex_init(&mgr->lock); diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 0d8caa0e7354..ad3b35a0e6eb 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -92,7 +92,7 @@ struct xe_uc_fw { const enum xe_uc_fw_status status; /** * @__status: private firmware load status - only to be used - * by firmware laoding code + * by firmware loading code */ enum xe_uc_fw_status __status; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b4a44e1ea167..690330352d4c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1024,7 +1024,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) /* * Since userptr pages are not pinned, we can't remove - * the notifer until we're sure the GPU is not accessing + * the notifier until we're sure the GPU is not accessing * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); @@ -2107,7 +2107,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) } } - /* Adjust for partial unbind after removin VMA from VM */ + /* Adjust for partial unbind after removing VMA from VM */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 3ed12a85cc60..40438c3d9b72 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -34,7 +34,7 @@ GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) - MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) 14019789679 GRAPHICS_VERSION(1255) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 0b63fd48ea92..979f6d3239ba 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1564,7 +1564,7 @@ static void zynqmp_dp_bridge_atomic_enable(struct drm_bridge *bridge, pm_runtime_get_sync(dp->dev); - mutex_lock(&dp->lock); + guard(mutex)(&dp->lock); zynqmp_dp_disp_enable(dp, old_bridge_state); /* @@ -1624,7 +1624,6 @@ static void zynqmp_dp_bridge_atomic_enable(struct drm_bridge *bridge, zynqmp_dp_write(dp, ZYNQMP_DP_SOFTWARE_RESET, ZYNQMP_DP_SOFTWARE_RESET_ALL); zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_ENABLE, 1); - mutex_unlock(&dp->lock); } static void zynqmp_dp_bridge_atomic_disable(struct drm_bridge *bridge, diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.h b/drivers/gpu/drm/xlnx/zynqmp_dpsub.h index 49875529c2a4..d771b8b199e0 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.h +++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.h @@ -60,6 +60,7 @@ struct zynqmp_dpsub_audio; * @layers: Video and graphics layers * @dp: The DisplayPort controller * @dma_align: DMA alignment constraint (must be a power of 2) + * @audio: DP audio data */ struct zynqmp_dpsub { struct device *dev; |