diff options
author | Jani Nikula <jani.nikula@intel.com> | 2021-10-29 13:38:34 +0300 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2021-10-29 13:40:45 +0300 |
commit | c1bb3a463dac815598362fb642a2746ff0e8f2f8 (patch) | |
tree | 090d50f01c5c4e1e29b1d4398251745715e5a2a9 /drivers/gpu | |
parent | ead3ea12e133416fbd800eedb2fb5d0faf2df431 (diff) | |
parent | 31fa8cbce4664946a1688898410fee41ad05364d (diff) |
Merge drm/drm-next into drm-intel-next
Backmerge to get the DP 2.0 MST changes merged to drm-next. This also
syncs us up to v5.15-rc7.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu')
271 files changed, 7084 insertions, 3781 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8d0748184a14..653726588956 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -73,10 +73,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ - nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o \ - beige_goby_reg_init.o yellow_carp_reg_init.o cyan_skillfish_reg_init.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ + nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 148f6c3343ab..bcfdb63b1d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -307,6 +307,8 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } + amdgpu_ras_set_error_query_ready(adev, true); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 054c1a224def..cdf46bd0d8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1503,7 +1503,7 @@ allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: - amdgpu_bo_unref(&bo); + drm_gem_object_put(gobj); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 239e71174855..c218d53a031d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3839,10 +3839,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_fbdev_fini(adev); - amdgpu_irq_fini_hw(adev); - amdgpu_device_ip_fini_early(adev); + amdgpu_irq_fini_hw(adev); + ttm_device_clear_dma_mappings(&adev->mman.bdev); amdgpu_gart_dummy_page_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 2bebd2ce6474..208a784475bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -108,6 +108,8 @@ static const char *hw_id_names[HW_ID_MAX] = { [HDP_HWID] = "HDP", [SDMA0_HWID] = "SDMA0", [SDMA1_HWID] = "SDMA1", + [SDMA2_HWID] = "SDMA2", + [SDMA3_HWID] = "SDMA3", [ISP_HWID] = "ISP", [DBGU_IO_HWID] = "DBGU_IO", [DF_HWID] = "DF", @@ -505,6 +507,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) break; } } + /* some IP discovery tables on Navy Flounder don't have this set correctly */ + if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; if (vcn_harvest_count == adev->vcn.num_vcn_inst) { adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; @@ -736,6 +742,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(1, 0, 1): case IP_VERSION(2, 0, 2): case IP_VERSION(2, 0, 0): + case IP_VERSION(2, 0, 3): case IP_VERSION(2, 1, 0): case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 2): @@ -745,8 +752,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 3): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; - case IP_VERSION(2, 0, 3): - break; default: return -EINVAL; } @@ -1120,10 +1125,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; + case IP_VERSION(7, 4, 4): + adev->nbio.funcs = &nbio_v7_4_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): case IP_VERSION(7, 5, 0): @@ -1134,12 +1142,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 6b39e6c02dd8..fd04e83031d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -65,7 +65,6 @@ static int psp_securedisplay_terminate(struct psp_context *psp); * * This new sequence is required for * - Arcturus and onwards - * - Navi12 and onwards */ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp) { @@ -77,7 +76,9 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp } switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 11): @@ -1291,6 +1292,29 @@ static int psp_ras_unload(struct psp_context *psp) return psp_ta_unload(psp, &psp->ras_context.context); } +static void psp_ras_ta_check_status(struct psp_context *psp) +{ + struct ta_ras_shared_memory *ras_cmd = + (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + + switch (ras_cmd->ras_status) { + case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP: + dev_warn(psp->adev->dev, + "RAS WARNING: cmd failed due to unsupported ip\n"); + break; + case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: + dev_warn(psp->adev->dev, + "RAS WARNING: cmd failed due to unsupported error injection\n"); + break; + case TA_RAS_STATUS__SUCCESS: + break; + default: + dev_warn(psp->adev->dev, + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + break; + } +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1325,10 +1349,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) dev_warn(psp->adev->dev, "RAS internal register access blocked\n"); - if (ras_cmd->ras_status == TA_RAS_STATUS__ERROR_UNSUPPORTED_IP) - dev_warn(psp->adev->dev, "RAS WARNING: cmd failed due to unsupported ip\n"); - else if (ras_cmd->ras_status) - dev_warn(psp->adev->dev, "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + psp_ras_ta_check_status(psp); } return ret; @@ -2622,6 +2643,12 @@ static int psp_resume(void *handle) goto failed; } + ret = psp_rl_load(adev); + if (ret) { + dev_err(adev->dev, "PSP load RL failed!\n"); + goto failed; + } + if (adev->gmc.xgmi.num_physical_nodes > 1) { ret = psp_xgmi_initialize(psp, false, true); /* Warning the XGMI seesion initialize failure diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e8875351967e..08133de21fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -112,7 +112,12 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); #ifdef CONFIG_X86_MCE_AMD -static void amdgpu_register_bad_pages_mca_notifier(void); +static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); +struct mce_notifier_adev_list { + struct amdgpu_device *devs[MAX_GPU_INSTANCE]; + int num_gpu; +}; +static struct mce_notifier_adev_list mce_adev_list; #endif void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready) @@ -2108,7 +2113,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) - amdgpu_register_bad_pages_mca_notifier(); + amdgpu_register_bad_pages_mca_notifier(adev); #endif return 0; @@ -2605,24 +2610,18 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev) #ifdef CONFIG_X86_MCE_AMD static struct amdgpu_device *find_adev(uint32_t node_id) { - struct amdgpu_gpu_instance *gpu_instance; int i; struct amdgpu_device *adev = NULL; - mutex_lock(&mgpu_info.mutex); - - for (i = 0; i < mgpu_info.num_gpu; i++) { - gpu_instance = &(mgpu_info.gpu_ins[i]); - adev = gpu_instance->adev; + for (i = 0; i < mce_adev_list.num_gpu; i++) { + adev = mce_adev_list.devs[i]; - if (adev->gmc.xgmi.connected_to_cpu && + if (adev && adev->gmc.xgmi.connected_to_cpu && adev->gmc.xgmi.physical_node_id == node_id) break; adev = NULL; } - mutex_unlock(&mgpu_info.mutex); - return adev; } @@ -2718,9 +2717,19 @@ static struct notifier_block amdgpu_bad_page_nb = { .priority = MCE_PRIO_UC, }; -static void amdgpu_register_bad_pages_mca_notifier(void) +static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev) { /* + * Add the adev to the mce_adev_list. + * During mode2 reset, amdgpu device is temporarily + * removed from the mgpu_info list which can cause + * page retirement to fail. + * Use this list instead of mgpu_info to find the amdgpu + * device on which the UMC error was reported. + */ + mce_adev_list.devs[mce_adev_list.num_gpu++] = adev; + + /* * Register the x86 notifier only once * with MCE subsystem. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 98732518543e..f4c05ff4b26c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1101,7 +1101,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, *exceed_err_limit = true; dev_err(adev->dev, "RAS records:%d exceed threshold:%d, " - "maybe retire this GPU?", + "GPU will not be initialized. Replace this GPU or increase the threshold", control->ras_num_recs, ras->bad_page_cnt_threshold); } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 19d4d8b54490..590537b62a0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1235,7 +1235,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) * */ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, - unsigned long end) + unsigned long end, unsigned long *userptr) { struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long size; @@ -1250,6 +1250,8 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; + if (userptr) + *userptr = gtt->userptr; return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 639c7b41e30b..7346ecff4438 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -182,7 +182,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, - unsigned long end); + unsigned long end, unsigned long *userptr); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0c3127f37686..920c4f6ac914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -509,7 +509,7 @@ static ssize_t show_##name(struct device *dev, \ struct drm_device *ddev = dev_get_drvdata(dev); \ struct amdgpu_device *adev = drm_to_adev(ddev); \ \ - return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \ + return sysfs_emit(buf, "0x%08x\n", adev->field); \ } \ static DEVICE_ATTR(name, mode, show_##name, NULL) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index c7d316850570..2658414c503d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -949,3 +949,30 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) return AMDGPU_RING_PRIO_0; } } + +void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) +{ + int i; + unsigned int idx; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* currently only support 2 FW instances */ + if (i >= 2) { + dev_info(adev->dev, "More then 2 VCN FW instances!\n"); + break; + } + idx = AMDGPU_UCODE_ID_VCN + i; + adev->firmware.ucode[idx].ucode_id = idx; + adev->firmware.ucode[idx].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 795cbaa02ff8..bfa27ea94804 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -310,4 +310,6 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring); +void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 88c4177b708a..99c149397aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -584,6 +584,7 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->encode_usage = 0; vf2pf_info->decode_usage = 0; + vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; vf2pf_info->checksum = amd_sriov_msg_checksum( vf2pf_info, vf2pf_info->header.size, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 995899191288..7326b6c1b71c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -261,9 +261,10 @@ struct amd_sriov_msg_vf2pf_info { uint8_t id; uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; + uint64_t dummy_page_addr; /* reserved */ - uint32_t reserved[256-68]; + uint32_t reserved[256-70]; }; /* mailbox message send from guest to host */ diff --git a/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c b/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c deleted file mode 100644 index 608a113ce354..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "soc15_hw_ip.h" -#include "beige_goby_ip_offset.h" - -int beige_goby_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialize the block needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - } - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c deleted file mode 100644 index 58808814d8fb..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "soc15_hw_ip.h" -#include "cyan_skillfish_ip_offset.h" - -int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialized the blocke needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - } - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 71bb3c0dc1da..90a834dc4008 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -270,25 +270,6 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); -static const struct soc15_reg_golden golden_settings_gc_10_0[] = -{ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), - /* TA_GRAD_ADJ_UCONFIG -> TA_GRAD_ADJ */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382), - /* VGT_TF_RING_SIZE_UMD -> VGT_TF_RING_SIZE */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2262c24e), - /* VGT_HS_OFFCHIP_PARAM_UMD -> VGT_HS_OFFCHIP_PARAM */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226cc24f), - /* VGT_TF_MEMORY_BASE_UMD -> VGT_TF_MEMORY_BASE */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226ec250), - /* VGT_TF_MEMORY_BASE_HI_UMD -> VGT_TF_MEMORY_BASE_HI */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2278c261), - /* VGT_ESGS_RING_SIZE_UMD -> VGT_ESGS_RING_SIZE */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2232c240), - /* VGT_GSVS_RING_SIZE_UMD -> VGT_GSVS_RING_SIZE */ - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2233c241), -}; - static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3810,9 +3791,6 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) break; case IP_VERSION(10, 1, 3): soc15_program_register_sequence(adev, - golden_settings_gc_10_0, - (const u32)ARRAY_SIZE(golden_settings_gc_10_0)); - soc15_program_register_sequence(adev, golden_settings_gc_10_0_cyan_skillfish, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); break; @@ -8238,8 +8216,9 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); - if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 1, 10)) && - (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(10, 1, 2))) + if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))) gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c deleted file mode 100644 index 88efaecf9f70..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "navi10_ip_offset.h" - -int navi10_reg_base_init(struct amdgpu_device *adev) -{ - int i; - - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); - } - - return 0; -} - - diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c deleted file mode 100644 index a786d159e5e9..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "navi12_ip_offset.h" - -int navi12_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialized the blocks needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); - } - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c deleted file mode 100644 index 4ea1e8fbb601..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "navi14_ip_offset.h" - -int navi14_reg_base_init(struct amdgpu_device *adev) -{ - int i; - - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b184b656b9b6..4ecd2b5808ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -53,6 +53,16 @@ #define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L + static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -318,6 +328,27 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; +const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { + .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, +}; + static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index a43b60acf7f6..6074dd3a1ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,6 +27,7 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; +extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 91b3afa946f5..b8bd03d16dba 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -56,12 +56,15 @@ * These are nbio v7_4_1 registers mask. Temporarily define these here since * nbio v7_4_1 header is incomplete. */ -#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */ #define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L #define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L #define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L #define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L #define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L #define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc #define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 @@ -334,12 +337,27 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, +}; + +const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { + .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, }; static void nbio_v7_4_init_registers(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index b8216581ec8d..cc5692db6f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,6 +27,7 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; +extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 898e688be63c..febc903adf58 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -607,304 +607,11 @@ const struct amdgpu_ip_block_version nv_common_ip_block = .funcs = &nv_common_ip_funcs, }; -static int nv_reg_base_init(struct amdgpu_device *adev) -{ - int r; - - if (amdgpu_discovery) { - r = amdgpu_discovery_reg_base_init(adev); - if (r) { - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - goto legacy_init; - } - - amdgpu_discovery_harvest_ip(adev); - - return 0; - } - -legacy_init: - switch (adev->asic_type) { - case CHIP_NAVI10: - navi10_reg_base_init(adev); - break; - case CHIP_NAVI14: - navi14_reg_base_init(adev); - break; - case CHIP_NAVI12: - navi12_reg_base_init(adev); - break; - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - sienna_cichlid_reg_base_init(adev); - break; - case CHIP_VANGOGH: - vangogh_reg_base_init(adev); - break; - case CHIP_DIMGREY_CAVEFISH: - dimgrey_cavefish_reg_base_init(adev); - break; - case CHIP_BEIGE_GOBY: - beige_goby_reg_base_init(adev); - break; - case CHIP_YELLOW_CARP: - yellow_carp_reg_base_init(adev); - break; - case CHIP_CYAN_SKILLFISH: - cyan_skillfish_reg_base_init(adev); - break; - default: - return -EINVAL; - } - - return 0; -} - void nv_set_virt_ops(struct amdgpu_device *adev) { adev->virt.ops = &xgpu_nv_virt_ops; } -int nv_set_ip_blocks(struct amdgpu_device *adev) -{ - int r; - - if (adev->asic_type == CHIP_CYAN_SKILLFISH) { - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - } else if (adev->flags & AMD_IS_APU) { - adev->nbio.funcs = &nbio_v7_2_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; - } else { - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - } - adev->hdp.funcs = &hdp_v5_0_funcs; - - if (adev->asic_type >= CHIP_SIENNA_CICHLID) - adev->smuio.funcs = &smuio_v11_0_6_funcs; - else - adev->smuio.funcs = &smuio_v11_0_funcs; - - if (adev->asic_type == CHIP_SIENNA_CICHLID) - adev->gmc.xgmi.supported = true; - - /* Set IP register base before any HW register access */ - r = nv_reg_base_init(adev); - if (r) - return r; - - switch (adev->asic_type) { - case CHIP_NAVI10: - case CHIP_NAVI14: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - !amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - !amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); - if (adev->enable_mes) - amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); - break; - case CHIP_NAVI12: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - if (!amdgpu_sriov_vf(adev)) { - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - !amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); - break; - case CHIP_SIENNA_CICHLID: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - if (!amdgpu_sriov_vf(adev)) { - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - } else { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - if (adev->enable_mes) - amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); - break; - case CHIP_NAVY_FLOUNDER: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - break; - case CHIP_VANGOGH: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - break; - case CHIP_DIMGREY_CAVEFISH: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - break; - case CHIP_BEIGE_GOBY: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - break; - case CHIP_YELLOW_CARP: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - break; - case CHIP_CYAN_SKILLFISH: - amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - } - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); - break; - default: - return -EINVAL; - } - - return 0; -} - static uint32_t nv_get_rev_id(struct amdgpu_device *adev) { return adev->nbio.funcs->get_rev_id(adev); @@ -1248,7 +955,7 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; if (adev->pdev->device == 0x1681) - adev->external_rev_id = adev->rev_id + 0x19; + adev->external_rev_id = 0x20; else adev->external_rev_id = adev->rev_id + 0x01; break; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 7df2f85bbcd0..83e9782aef39 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -31,15 +31,5 @@ extern const struct amdgpu_ip_block_version nv_common_ip_block; void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); void nv_set_virt_ops(struct amdgpu_device *adev); -int nv_set_ip_blocks(struct amdgpu_device *adev); -int navi10_reg_base_init(struct amdgpu_device *adev); -int navi14_reg_base_init(struct amdgpu_device *adev); -int navi12_reg_base_init(struct amdgpu_device *adev); -int sienna_cichlid_reg_base_init(struct amdgpu_device *adev); -void vangogh_reg_base_init(struct amdgpu_device *adev); -int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev); -int beige_goby_reg_base_init(struct amdgpu_device *adev); -int yellow_carp_reg_base_init(struct amdgpu_device *adev); -int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c deleted file mode 100644 index 5ee69f70c49b..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "soc15_hw_ip.h" -#include "sienna_cichlid_ip_offset.h" - -int sienna_cichlid_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialized the blocke needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - } - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0640e143e7a5..0c316a2d42ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -780,185 +780,6 @@ void soc15_set_virt_ops(struct amdgpu_device *adev) soc15_reg_base_init(adev); } -int soc15_set_ip_blocks(struct amdgpu_device *adev) -{ - /* for bare metal case */ - if (!amdgpu_sriov_vf(adev)) - soc15_reg_base_init(adev); - - if (adev->flags & AMD_IS_APU) { - adev->nbio.funcs = &nbio_v7_0_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg; - } else if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) { - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - } else { - adev->nbio.funcs = &nbio_v6_1_funcs; - adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; - } - adev->hdp.funcs = &hdp_v4_0_funcs; - - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) - adev->df.funcs = &df_v3_6_funcs; - else - adev->df.funcs = &df_v1_7_funcs; - - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) - adev->smuio.funcs = &smuio_v11_0_funcs; - else if (adev->asic_type == CHIP_ALDEBARAN) - adev->smuio.funcs = &smuio_v13_0_funcs; - else - adev->smuio.funcs = &smuio_v9_0_funcs; - - adev->rev_id = soc15_get_rev_id(adev); - - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - - /* For Vega10 SR-IOV, PSP need to be initialized before IH */ - if (amdgpu_sriov_vf(adev)) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); - } - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - else - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - } else { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - else - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); - } - } - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (is_support_sw_smu(adev)) { - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - } - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) { - amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); - amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); - } - break; - case CHIP_RAVEN: - amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); - break; - case CHIP_ARCTURUS: - amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - - if (amdgpu_sriov_vf(adev)) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - } - - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - - if (amdgpu_sriov_vf(adev)) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); - } - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); - break; - case CHIP_RENOIR: - amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); -#if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); -#endif - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); - break; - case CHIP_ALDEBARAN: - amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - - if (amdgpu_sriov_vf(adev)) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); - } - - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - - amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_6_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v2_6_ip_block); - break; - default: - return -EINVAL; - } - - return 0; -} - static bool soc15_need_full_reset(struct amdgpu_device *adev) { /* change this when we implement soft reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index f9359003385d..efc2a253e8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -102,7 +102,6 @@ struct soc15_ras_field_entry { void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); void soc15_set_virt_ops(struct amdgpu_device *adev); -int soc15_set_ip_blocks(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, const struct soc15_reg_golden *registers, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index de24a0a97d5e..5093826a43d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -59,7 +59,12 @@ enum ta_ras_status { TA_RAS_STATUS__ERROR_SYS_DRV_REG_ACCESS = 0xA011, TA_RAS_STATUS__ERROR_RAS_READ_WRITE = 0xA012, TA_RAS_STATUS__ERROR_NULL_PTR = 0xA013, - TA_RAS_STATUS__ERROR_UNSUPPORTED_IP = 0xA014 + TA_RAS_STATUS__ERROR_UNSUPPORTED_IP = 0xA014, + TA_RAS_STATUS__ERROR_PCS_STATE_QUIET = 0xA015, + TA_RAS_STATUS__ERROR_PCS_STATE_ERROR = 0xA016, + TA_RAS_STATUS__ERROR_PCS_STATE_HANG = 0xA017, + TA_RAS_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018, + TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019 }; enum ta_ras_block { diff --git a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c deleted file mode 100644 index d64d681a05dc..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "soc15_hw_ip.h" -#include "vangogh_ip_offset.h" - -void vangogh_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialized the blocke needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index ad0d2564087c..d54d720b3cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -111,15 +111,7 @@ static int vcn_v1_0_sw_init(void *handle) /* Override the work func */ adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); - } + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 091d8c0f6801..313fc1b53999 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -115,15 +115,7 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); - } + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) @@ -1884,15 +1876,14 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - tmp = AMDGPU_UCODE_ID_VCN; MMSCH_V2_0_INSERT_DIRECT_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - adev->firmware.ucode[tmp].tmr_mc_addr_lo); + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo); MMSCH_V2_0_INSERT_DIRECT_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - adev->firmware.ucode[tmp].tmr_mc_addr_hi); + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi); offset = 0; } else { MMSCH_V2_0_INSERT_DIRECT_WT( diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 59f469bab005..44fc4c218433 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -139,22 +139,7 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - - if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - } - dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); - } + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index dbfd92984655..da11ceba0698 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,11 +60,6 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; -static int amdgpu_ucode_id_vcns[] = { - AMDGPU_UCODE_ID_VCN, - AMDGPU_UCODE_ID_VCN1 -}; - static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -130,22 +125,7 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - - if (adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) { - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - } - dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); - } + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) @@ -1293,7 +1273,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) uint32_t param, resp, expected; uint32_t offset, cache_size; uint32_t tmp, timeout; - uint32_t id; struct amdgpu_mm_table *table = &adev->virt.mm_table; uint32_t *table_loc; @@ -1337,13 +1316,12 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - id = amdgpu_ucode_id_vcns[i]; MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - adev->firmware.ucode[id].tmr_mc_addr_lo); + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - adev->firmware.ucode[id].tmr_mc_addr_hi); + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); offset = 0; MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), diff --git a/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c b/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c deleted file mode 100644 index 3d89421275ed..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include "amdgpu.h" -#include "nv.h" - -#include "soc15_common.h" -#include "soc15_hw_ip.h" -#include "yellow_carp_offset.h" - -int yellow_carp_reg_base_init(struct amdgpu_device *adev) -{ - /* HW has more IP blocks, only initialized the block needed by driver */ - uint32_t i; - for (i = 0 ; i < MAX_INSTANCE ; ++i) { - adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); - adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); - adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); - adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); - adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); - adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); - adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); - adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); - adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); - adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); - adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); - adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); - adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); - adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - } - return 0; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 86afd37b098d..f1e7edeb4e6b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1259,6 +1259,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (args->size == 0) return -EINVAL; +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + /* Flush pending deferred work to avoid racing with deferred actions + * from previous memory map changes (e.g. munmap). + */ + svm_range_list_lock_and_flush_work(&p->svms, current->mm); + mutex_lock(&p->svms.lock); + mmap_write_unlock(current->mm); + if (interval_tree_iter_first(&p->svms.objects, + args->va_addr >> PAGE_SHIFT, + (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { + pr_err("Address: 0x%llx already allocated by SVM\n", + args->va_addr); + mutex_unlock(&p->svms.lock); + return -EADDRINUSE; + } + mutex_unlock(&p->svms.lock); +#endif dev = kfd_device_by_id(args->gpu_id); if (!dev) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 064d42acd54e..0fffaf859c59 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -93,7 +93,6 @@ static const struct kfd_device_info carrizo_device_info = { .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; -#endif static const struct kfd_device_info raven_device_info = { .asic_family = CHIP_RAVEN, @@ -113,7 +112,9 @@ static const struct kfd_device_info raven_device_info = { .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, .asic_name = "hawaii", @@ -133,6 +134,7 @@ static const struct kfd_device_info hawaii_device_info = { .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; +#endif static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, @@ -1021,6 +1023,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_double_confirm_iommu_support(kfd); if (kfd_iommu_device_init(kfd)) { + kfd->use_iommu_v2 = false; dev_err(kfd_device, "Error initializing iommuv2\n"); goto device_iommu_error; } @@ -1029,6 +1032,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init((struct amdgpu_device *)kfd->kgd); + if(kgd2kfd_resume_iommu(kfd)) + goto device_iommu_error; + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f53e17a94ad8..6d8634e40b3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -20,7 +20,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - #include <linux/types.h> #include <linux/hmm.h> #include <linux/dma-direction.h> @@ -34,6 +33,11 @@ #include "kfd_svm.h" #include "kfd_migrate.h" +#ifdef dev_fmt +#undef dev_fmt +#endif +#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__ + static uint64_t svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) { @@ -151,14 +155,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, gart_d = svm_migrate_direct_mapping_addr(adev, *vram); } if (r) { - pr_debug("failed %d to create gart mapping\n", r); + dev_err(adev->dev, "fail %d create gart mapping\n", r); goto out_unlock; } r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, NULL, &next, false, true, false); if (r) { - pr_debug("failed %d to copy memory\n", r); + dev_err(adev->dev, "fail %d to copy memory\n", r); goto out_unlock; } @@ -264,6 +268,19 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } +static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) +{ + unsigned long cpages = 0; + unsigned long i; + + for (i = 0; i < migrate->npages; i++) { + if (migrate->src[i] & MIGRATE_PFN_VALID && + migrate->src[i] & MIGRATE_PFN_MIGRATE) + cpages++; + } + return cpages; +} + static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, @@ -285,7 +302,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, r = svm_range_vram_node_new(adev, prange, true); if (r) { - pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); + dev_err(adev->dev, "fail %d to alloc vram\n", r); goto out; } @@ -305,7 +322,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); if (r) { - pr_debug("failed %d dma_map_page\n", r); + dev_err(adev->dev, "fail %d dma_map_page\n", r); goto out_free_vram_pages; } } else { @@ -325,8 +342,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, continue; } - pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", - src[i] >> PAGE_SHIFT, page_to_pfn(spage)); + pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n", + src[i] >> PAGE_SHIFT, page_to_pfn(spage)); if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) { r = svm_migrate_copy_memory_gart(adev, src + i - j, @@ -372,7 +389,7 @@ out: return r; } -static int +static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end) @@ -381,6 +398,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; + unsigned long cpages = 0; dma_addr_t *scratch; size_t size; void *buf; @@ -405,23 +423,31 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, r = migrate_vma_setup(&migrate); if (r) { - pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange->start, prange->last); + dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, + prange->start, prange->last); goto out_free; } - if (migrate.cpages != npages) { - pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n", - migrate.cpages, - npages); - } - if (migrate.cpages) { - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, - scratch); - migrate_vma_pages(&migrate); - svm_migrate_copy_done(adev, mfence); - migrate_vma_finalize(&migrate); + cpages = migrate.cpages; + if (!cpages) { + pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n", + prange->start, prange->last); + goto out_free; } + if (cpages != npages) + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + cpages, npages); + else + pr_debug("0x%lx pages migrated\n", cpages); + + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); + migrate_vma_pages(&migrate); + + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + svm_migrate_successful_pages(&migrate), cpages, migrate.npages); + + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_free_dma_mappings(prange); @@ -429,12 +455,13 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, out_free: kvfree(buf); out: - if (!r) { + if (!r && cpages) { pdd = svm_range_get_pdd_by_adev(prange, adev); if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages); - } + WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); + return cpages; + } return r; } @@ -456,7 +483,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; - int r = 0; + unsigned long cpages = 0; + long r = 0; if (prange->actual_loc == best_loc) { pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", @@ -488,17 +516,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, next = min(vma->vm_end, end); r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); - if (r) { - pr_debug("failed to migrate\n"); + if (r < 0) { + pr_debug("failed %ld to migrate\n", r); break; + } else { + cpages += r; } addr = next; } - if (!r) + if (cpages) prange->actual_loc = best_loc; - return r; + return r < 0 ? r : 0; } static void svm_migrate_page_free(struct page *page) @@ -506,7 +536,7 @@ static void svm_migrate_page_free(struct page *page) struct svm_range_bo *svm_bo = page->zone_device_data; if (svm_bo) { - pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref)); + pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref)); svm_range_bo_unref(svm_bo); } } @@ -572,12 +602,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); r = dma_mapping_error(dev, dst[i]); if (r) { - pr_debug("failed %d dma_map_page\n", r); + dev_err(adev->dev, "fail %d dma_map_page\n", r); goto out_oom; } - pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", - dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); + pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n", + dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); migrate->dst[i] |= MIGRATE_PFN_LOCKED; @@ -599,7 +629,7 @@ out_oom: return r; } -static int +static long svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end) { @@ -607,6 +637,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; + unsigned long cpages = 0; dma_addr_t *scratch; size_t size; void *buf; @@ -631,34 +662,43 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, r = migrate_vma_setup(&migrate); if (r) { - pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange->start, prange->last); + dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, + prange->start, prange->last); goto out_free; } - pr_debug("cpages %ld\n", migrate.cpages); - - if (migrate.cpages) { - r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, - scratch, npages); - migrate_vma_pages(&migrate); - svm_migrate_copy_done(adev, mfence); - migrate_vma_finalize(&migrate); - } else { + cpages = migrate.cpages; + if (!cpages) { pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", prange->start, prange->last); + goto out_free; } + if (cpages != npages) + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + cpages, npages); + else + pr_debug("0x%lx pages migrated\n", cpages); + r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + scratch, npages); + migrate_vma_pages(&migrate); + + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + svm_migrate_successful_pages(&migrate), cpages, migrate.npages); + + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); svm_range_dma_unmap(adev->dev, scratch, 0, npages); out_free: kvfree(buf); out: - if (!r) { + if (!r && cpages) { pdd = svm_range_get_pdd_by_adev(prange, adev); if (pdd) - WRITE_ONCE(pdd->page_out, - pdd->page_out + migrate.cpages); + WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); + + return cpages; } return r; } @@ -680,7 +720,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) unsigned long addr; unsigned long start; unsigned long end; - int r = 0; + unsigned long cpages = 0; + long r = 0; if (!prange->actual_loc) { pr_debug("[0x%lx 0x%lx] already migrated to ram\n", @@ -711,18 +752,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) next = min(vma->vm_end, end); r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); - if (r) { - pr_debug("failed %d to migrate\n", r); + if (r < 0) { + pr_debug("failed %ld to migrate\n", r); break; + } else { + cpages += r; } addr = next; } - if (!r) { + if (cpages) { svm_range_vram_node_free(prange); prange->actual_loc = 0; } - return r; + + return r < 0 ? r : 0; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 179080329af8..b691c8495d66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -33,6 +33,11 @@ #include "kfd_svm.h" #include "kfd_migrate.h" +#ifdef dev_fmt +#undef dev_fmt +#endif +#define dev_fmt(fmt) "kfd_svm: %s: " fmt, __func__ + #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 /* Long enough to ensure no retry fault comes after svm range is restored and @@ -45,7 +50,9 @@ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq); - +static int +svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last, + uint64_t *bo_s, uint64_t *bo_l); static const struct mmu_interval_notifier_ops svm_range_mn_ops = { .invalidate = svm_range_cpu_invalidate_pagetables, }; @@ -158,17 +165,17 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.dev->pgmap.range.start; addr[i] |= SVM_RANGE_VRAM_DOMAIN; - pr_debug("vram address detected: 0x%llx\n", addr[i]); + pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]); continue; } addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); r = dma_mapping_error(dev, addr[i]); if (r) { - pr_debug("failed %d dma_map_page\n", r); + dev_err(dev, "failed %d dma_map_page\n", r); return r; } - pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", - addr[i] >> PAGE_SHIFT, page_to_pfn(page)); + pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", + addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } return 0; } @@ -217,7 +224,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, for (i = offset; i < offset + npages; i++) { if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i])) continue; - pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); + pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); dma_addr[i] = 0; } @@ -1454,7 +1461,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, /* This should never happen. actual_loc gets set by * svm_migrate_ram_to_vram after allocating a BO. */ - WARN(1, "VRAM BO missing during validation\n"); + WARN_ONCE(1, "VRAM BO missing during validation\n"); return -EINVAL; } @@ -1547,7 +1554,7 @@ unreserve_out: * Context: Returns with mmap write lock held, pending deferred work flushed * */ -static void +void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm) { @@ -2303,6 +2310,7 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } + static int svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, unsigned long *start, unsigned long *last) @@ -2350,8 +2358,59 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, vma->vm_end >> PAGE_SHIFT, *last); return 0; +} + +static int +svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last, + uint64_t *bo_s, uint64_t *bo_l) +{ + struct amdgpu_bo_va_mapping *mapping; + struct interval_tree_node *node; + struct amdgpu_bo *bo = NULL; + unsigned long userptr; + uint32_t i; + int r; + + for (i = 0; i < p->n_pdds; i++) { + struct amdgpu_vm *vm; + + if (!p->pdds[i]->drm_priv) + continue; + + vm = drm_priv_to_vm(p->pdds[i]->drm_priv); + r = amdgpu_bo_reserve(vm->root.bo, false); + if (r) + return r; + /* Check userptr by searching entire vm->va interval tree */ + node = interval_tree_iter_first(&vm->va, 0, ~0ULL); + while (node) { + mapping = container_of((struct rb_node *)node, + struct amdgpu_bo_va_mapping, rb); + bo = mapping->bo_va->base.bo; + + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start << PAGE_SHIFT, + last << PAGE_SHIFT, + &userptr)) { + node = interval_tree_iter_next(node, 0, ~0ULL); + continue; + } + + pr_debug("[0x%llx 0x%llx] already userptr mapped\n", + start, last); + if (bo_s && bo_l) { + *bo_s = userptr >> PAGE_SHIFT; + *bo_l = *bo_s + bo->tbo.ttm->num_pages - 1; + } + amdgpu_bo_unreserve(vm->root.bo); + return -EADDRINUSE; + } + amdgpu_bo_unreserve(vm->root.bo); + } + return 0; } + static struct svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, struct kfd_process *p, @@ -2361,10 +2420,26 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, struct svm_range *prange = NULL; unsigned long start, last; uint32_t gpuid, gpuidx; + uint64_t bo_s = 0; + uint64_t bo_l = 0; + int r; if (svm_range_get_range_boundaries(p, addr, &start, &last)) return NULL; + r = svm_range_check_vm(p, start, last, &bo_s, &bo_l); + if (r != -EADDRINUSE) + r = svm_range_check_vm_userptr(p, start, last, &bo_s, &bo_l); + + if (r == -EADDRINUSE) { + if (addr >= bo_s && addr <= bo_l) + return NULL; + + /* Create one page svm range if 2MB range overlapping */ + start = addr; + last = addr; + } + prange = svm_range_new(&p->svms, start, last); if (!prange) { pr_debug("Failed to create prange in address [0x%llx]\n", addr); @@ -2663,8 +2738,67 @@ int svm_range_list_init(struct kfd_process *p) } /** + * svm_range_check_vm - check if virtual address range mapped already + * @p: current kfd_process + * @start: range start address, in pages + * @last: range last address, in pages + * @bo_s: mapping start address in pages if address range already mapped + * @bo_l: mapping last address in pages if address range already mapped + * + * The purpose is to avoid virtual address ranges already allocated by + * kfd_ioctl_alloc_memory_of_gpu ioctl. + * It looks for each pdd in the kfd_process. + * + * Context: Process context + * + * Return 0 - OK, if the range is not mapped. + * Otherwise error code: + * -EADDRINUSE - if address is mapped already by kfd_ioctl_alloc_memory_of_gpu + * -ERESTARTSYS - A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + */ +static int +svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last, + uint64_t *bo_s, uint64_t *bo_l) +{ + struct amdgpu_bo_va_mapping *mapping; + struct interval_tree_node *node; + uint32_t i; + int r; + + for (i = 0; i < p->n_pdds; i++) { + struct amdgpu_vm *vm; + + if (!p->pdds[i]->drm_priv) + continue; + + vm = drm_priv_to_vm(p->pdds[i]->drm_priv); + r = amdgpu_bo_reserve(vm->root.bo, false); + if (r) + return r; + + node = interval_tree_iter_first(&vm->va, start, last); + if (node) { + pr_debug("range [0x%llx 0x%llx] already TTM mapped\n", + start, last); + mapping = container_of((struct rb_node *)node, + struct amdgpu_bo_va_mapping, rb); + if (bo_s && bo_l) { + *bo_s = mapping->start; + *bo_l = mapping->last; + } + amdgpu_bo_unreserve(vm->root.bo); + return -EADDRINUSE; + } + amdgpu_bo_unreserve(vm->root.bo); + } + + return 0; +} + +/** * svm_range_is_valid - check if virtual address range is valid - * @mm: current process mm_struct + * @p: current kfd_process * @start: range start address, in pages * @size: range size, in pages * @@ -2673,28 +2807,28 @@ int svm_range_list_init(struct kfd_process *p) * Context: Process context * * Return: - * true - valid svm range - * false - invalid svm range + * 0 - OK, otherwise error code */ -static bool -svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) +static int +svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size) { const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; struct vm_area_struct *vma; unsigned long end; + unsigned long start_unchg = start; start <<= PAGE_SHIFT; end = start + (size << PAGE_SHIFT); - do { - vma = find_vma(mm, start); + vma = find_vma(p->mm, start); if (!vma || start < vma->vm_start || (vma->vm_flags & device_vma)) - return false; + return -EFAULT; start = min(end, vma->vm_end); } while (start < end); - return true; + return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT, NULL, + NULL); } /** @@ -2997,9 +3131,9 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svm_range_list_lock_and_flush_work(svms, mm); - if (!svm_range_is_valid(mm, start, size)) { - pr_debug("invalid range\n"); - r = -EFAULT; + r = svm_range_is_valid(p, start, size); + if (r) { + pr_debug("invalid range r=%d\n", r); mmap_write_unlock(mm); goto out; } @@ -3101,6 +3235,7 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t flags_or = 0; int gpuidx; uint32_t i; + int r = 0; pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start, start + size - 1, nattr); @@ -3114,12 +3249,12 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, flush_work(&p->svms.deferred_list_work); mmap_read_lock(mm); - if (!svm_range_is_valid(mm, start, size)) { - pr_debug("invalid range\n"); - mmap_read_unlock(mm); - return -EINVAL; - } + r = svm_range_is_valid(p, start, size); mmap_read_unlock(mm); + if (r) { + pr_debug("invalid range r=%d\n", r); + return r; + } for (i = 0; i < nattr; i++) { switch (attrs[i].type) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index c6ec55354c7b..6dc91c33e80f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -188,6 +188,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, void *owner); struct kfd_process_device * svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); +void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); /* SVM API and HMM page migration work together, device memory type * is initialized to not 0 when page migration register device memory. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 98cca5f2b27f..dd593ad0614a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1296,6 +1296,24 @@ int kfd_topology_add_device(struct kfd_dev *gpu) proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); + adev = (struct amdgpu_device *)(gpu->kgd); + + /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ + if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) { + struct kfd_topology_device *top_dev; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) { + if (top_dev->gpu) + break; + + top_dev->node_props.hive_id = gpu->hive_id; + } + + up_read(&topology_lock); + } + /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, * else create a Virtual CRAT for this gpu device and then parse that @@ -1457,7 +1475,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.max_waves_per_simd = 10; } - adev = (struct amdgpu_device *)(dev->gpu->kgd); /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f35561b5a465..ad571f71ee43 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1356,8 +1356,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) switch (adev->ip_versions[DCE_HWIP][0]) { case IP_VERSION(2, 1, 0): init_data.flags.gpu_vm_support = true; - if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id)) - init_data.flags.disable_dmcu = true; + init_data.flags.disable_dmcu = true; break; case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): @@ -4031,6 +4030,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) int32_t primary_planes; enum dc_connection_type new_connection_type = dc_connection_none; const struct dc_plane_cap *plane; + bool psr_feature_enabled = false; dm->display_indexes_num = dm->dc->caps.max_streams; /* Update the actual used number of crtc */ @@ -4113,6 +4113,19 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) DRM_DEBUG_KMS("Unsupported DCN IP version for outbox: 0x%X\n", adev->ip_versions[DCE_HWIP][0]); } + + /* Determine whether to enable PSR support by default. */ + if (!(amdgpu_dc_debug_mask & DC_DISABLE_PSR)) { + switch (adev->ip_versions[DCE_HWIP][0]) { + case IP_VERSION(3, 1, 2): + case IP_VERSION(3, 1, 3): + psr_feature_enabled = true; + break; + default: + psr_feature_enabled = amdgpu_dc_feature_mask & DC_PSR_MASK; + break; + } + } #endif /* loops over all connectors on the board */ @@ -4156,7 +4169,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } else if (dc_link_detect(link, DETECT_REASON_BOOT)) { amdgpu_dm_update_connector_after_detect(aconnector); register_backlight_device(dm, link); - if (amdgpu_dc_feature_mask & DC_PSR_MASK) + + if (psr_feature_enabled) amdgpu_dm_set_psr_caps(link); } @@ -10535,18 +10549,18 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, struct drm_crtc *crtc, struct drm_crtc_state *new_crtc_state) { - struct drm_plane_state *new_cursor_state, *new_primary_state; - int cursor_scale_w, cursor_scale_h, primary_scale_w, primary_scale_h; + struct drm_plane *cursor = crtc->cursor, *underlying; + struct drm_plane_state *new_cursor_state, *new_underlying_state; + int i; + int cursor_scale_w, cursor_scale_h, underlying_scale_w, underlying_scale_h; /* On DCE and DCN there is no dedicated hardware cursor plane. We get a * cursor per pipe but it's going to inherit the scaling and * positioning from the underlying pipe. Check the cursor plane's - * blending properties match the primary plane's. */ + * blending properties match the underlying planes'. */ - new_cursor_state = drm_atomic_get_new_plane_state(state, crtc->cursor); - new_primary_state = drm_atomic_get_new_plane_state(state, crtc->primary); - if (!new_cursor_state || !new_primary_state || - !new_cursor_state->fb || !new_primary_state->fb) { + new_cursor_state = drm_atomic_get_new_plane_state(state, cursor); + if (!new_cursor_state || !new_cursor_state->fb) { return 0; } @@ -10555,15 +10569,34 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, cursor_scale_h = new_cursor_state->crtc_h * 1000 / (new_cursor_state->src_h >> 16); - primary_scale_w = new_primary_state->crtc_w * 1000 / - (new_primary_state->src_w >> 16); - primary_scale_h = new_primary_state->crtc_h * 1000 / - (new_primary_state->src_h >> 16); + for_each_new_plane_in_state_reverse(state, underlying, new_underlying_state, i) { + /* Narrow down to non-cursor planes on the same CRTC as the cursor */ + if (new_underlying_state->crtc != crtc || underlying == crtc->cursor) + continue; - if (cursor_scale_w != primary_scale_w || - cursor_scale_h != primary_scale_h) { - drm_dbg_atomic(crtc->dev, "Cursor plane scaling doesn't match primary plane\n"); - return -EINVAL; + /* Ignore disabled planes */ + if (!new_underlying_state->fb) + continue; + + underlying_scale_w = new_underlying_state->crtc_w * 1000 / + (new_underlying_state->src_w >> 16); + underlying_scale_h = new_underlying_state->crtc_h * 1000 / + (new_underlying_state->src_h >> 16); + + if (cursor_scale_w != underlying_scale_w || + cursor_scale_h != underlying_scale_h) { + drm_dbg_atomic(crtc->dev, + "Cursor [PLANE:%d:%s] scaling doesn't match underlying [PLANE:%d:%s]\n", + cursor->base.id, cursor->name, underlying->base.id, underlying->name); + return -EINVAL; + } + + /* If this plane covers the whole CRTC, no need to check planes underneath */ + if (new_underlying_state->crtc_x <= 0 && + new_underlying_state->crtc_y <= 0 && + new_underlying_state->crtc_x + new_underlying_state->crtc_w >= new_crtc_state->mode.hdisplay && + new_underlying_state->crtc_y + new_underlying_state->crtc_h >= new_crtc_state->mode.vdisplay) + break; } return 0; @@ -10594,53 +10627,6 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm } #endif -static int validate_overlay(struct drm_atomic_state *state) -{ - int i; - struct drm_plane *plane; - struct drm_plane_state *new_plane_state; - struct drm_plane_state *primary_state, *overlay_state = NULL; - - /* Check if primary plane is contained inside overlay */ - for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) { - if (plane->type == DRM_PLANE_TYPE_OVERLAY) { - if (drm_atomic_plane_disabling(plane->state, new_plane_state)) - return 0; - - overlay_state = new_plane_state; - continue; - } - } - - /* check if we're making changes to the overlay plane */ - if (!overlay_state) - return 0; - - /* check if overlay plane is enabled */ - if (!overlay_state->crtc) - return 0; - - /* find the primary plane for the CRTC that the overlay is enabled on */ - primary_state = drm_atomic_get_plane_state(state, overlay_state->crtc->primary); - if (IS_ERR(primary_state)) - return PTR_ERR(primary_state); - - /* check if primary plane is enabled */ - if (!primary_state->crtc) - return 0; - - /* Perform the bounds check to ensure the overlay plane covers the primary */ - if (primary_state->crtc_x < overlay_state->crtc_x || - primary_state->crtc_y < overlay_state->crtc_y || - primary_state->crtc_x + primary_state->crtc_w > overlay_state->crtc_x + overlay_state->crtc_w || - primary_state->crtc_y + primary_state->crtc_h > overlay_state->crtc_y + overlay_state->crtc_h) { - DRM_DEBUG_ATOMIC("Overlay plane is enabled with hardware cursor but does not fully cover primary plane\n"); - return -EINVAL; - } - - return 0; -} - /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. * @dev: The DRM device @@ -10683,6 +10669,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, struct dm_crtc_state *dm_old_crtc_state; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_mst_fairness_vars vars[MAX_PIPES]; + struct drm_dp_mst_topology_state *mst_state; + struct drm_dp_mst_topology_mgr *mgr; #endif trace_amdgpu_dm_atomic_check_begin(state); @@ -10822,10 +10810,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } - ret = validate_overlay(state); - if (ret) - goto fail; - /* Add new/modified planes */ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { ret = dm_update_plane_state(dc, state, plane, @@ -10891,6 +10875,33 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, lock_and_validation_needed = true; } +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* set the slot info for each mst_state based on the link encoding format */ + for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { + struct amdgpu_dm_connector *aconnector; + struct drm_connector *connector; + struct drm_connector_list_iter iter; + u8 link_coding_cap; + + if (!mgr->mst_state ) + continue; + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { + int id = connector->index; + + if (id == mst_state->mgr->conn_base_id) { + aconnector = to_amdgpu_dm_connector(connector); + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link); + drm_dp_mst_update_slots(mst_state, link_coding_cap); + + break; + } + } + drm_connector_list_iter_end(&iter); + + } +#endif /** * Streams and planes are reset when there are changes that affect * bandwidth. Anything that affects bandwidth needs to go through diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 814f67d86a3c..1a68a674913c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -264,7 +264,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -294,6 +294,9 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, case LINK_RATE_RBR2: case LINK_RATE_HIGH2: case LINK_RATE_HIGH3: +#if defined(CONFIG_DRM_AMD_DC_DCN) + case LINK_RATE_UHBR10: +#endif break; default: valid_input = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index ff0f91c93ba4..8cbeeb7c986d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -219,6 +219,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; bool ret; + u8 link_coding_cap = DP_8b_10b_ENCODING; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; /* Accessing the connector state is required for vcpi_slots allocation @@ -238,6 +239,10 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( mst_port = aconnector->port; +#if defined(CONFIG_DRM_AMD_DC_DCN) + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link); +#endif + if (enable) { ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, @@ -251,7 +256,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( } /* It's OK for this to fail */ - drm_dp_update_payload_part1(mst_mgr); + drm_dp_update_payload_part1(mst_mgr, (link_coding_cap == DP_CAP_ANSI_128B132B) ? 0:1); /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or * AUX message. The sequence is slot 1-63 allocated sequence for each diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index cdb5c027411a..c17732fba039 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -99,6 +99,10 @@ static enum bp_result get_firmware_info_v3_2( struct bios_parser *bp, struct dc_firmware_info *info); +static enum bp_result get_firmware_info_v3_4( + struct bios_parser *bp, + struct dc_firmware_info *info); + static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp, struct atom_display_object_path_v2 *object); @@ -1426,8 +1430,10 @@ static enum bp_result bios_parser_get_firmware_info( break; case 2: case 3: - case 4: result = get_firmware_info_v3_2(bp, info); + break; + case 4: + result = get_firmware_info_v3_4(bp, info); break; default: break; @@ -1575,6 +1581,88 @@ static enum bp_result get_firmware_info_v3_2( return BP_RESULT_OK; } +static enum bp_result get_firmware_info_v3_4( + struct bios_parser *bp, + struct dc_firmware_info *info) +{ + struct atom_firmware_info_v3_4 *firmware_info; + struct atom_common_table_header *header; + struct atom_data_revision revision; + struct atom_display_controller_info_v4_1 *dce_info_v4_1 = NULL; + struct atom_display_controller_info_v4_4 *dce_info_v4_4 = NULL; + if (!info) + return BP_RESULT_BADINPUT; + + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_4, + DATA_TABLES(firmwareinfo)); + + if (!firmware_info) + return BP_RESULT_BADBIOSTABLE; + + memset(info, 0, sizeof(*info)); + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(dce_info)); + + get_atom_data_table_revision(header, &revision); + + switch (revision.major) { + case 4: + switch (revision.minor) { + case 4: + dce_info_v4_4 = GET_IMAGE(struct atom_display_controller_info_v4_4, + DATA_TABLES(dce_info)); + + if (!dce_info_v4_4) + return BP_RESULT_BADBIOSTABLE; + + /* 100MHz expected */ + info->pll_info.crystal_frequency = dce_info_v4_4->dce_refclk_10khz * 10; + info->dp_phy_ref_clk = dce_info_v4_4->dpphy_refclk_10khz * 10; + /* 50MHz expected */ + info->i2c_engine_ref_clk = dce_info_v4_4->i2c_engine_refclk_10khz * 10; + + /* Get SMU Display PLL VCO Frequency in KHz*/ + info->smu_gpu_pll_output_freq = dce_info_v4_4->dispclk_pll_vco_freq * 10; + break; + + default: + /* should not come here, keep as backup, as was before */ + dce_info_v4_1 = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); + + if (!dce_info_v4_1) + return BP_RESULT_BADBIOSTABLE; + + info->pll_info.crystal_frequency = dce_info_v4_1->dce_refclk_10khz * 10; + info->dp_phy_ref_clk = dce_info_v4_1->dpphy_refclk_10khz * 10; + info->i2c_engine_ref_clk = dce_info_v4_1->i2c_engine_refclk_10khz * 10; + break; + } + break; + + default: + ASSERT(0); + break; + } + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(smu_info)); + get_atom_data_table_revision(header, &revision); + + // We need to convert from 10KHz units into KHz units. + info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10; + + if (firmware_info->board_i2c_feature_id == 0x2) { + info->oem_i2c_present = true; + info->oem_i2c_obj_id = firmware_info->board_i2c_feature_gpio_id; + } else { + info->oem_i2c_present = false; + } + + return BP_RESULT_OK; +} + static enum bp_result bios_parser_get_encoder_cap_info( struct dc_bios *dcb, struct graphics_object_id object_id, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index d7bf9283dc90..0088dff441da 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -219,14 +219,17 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, update_dispclk = true; } - /* TODO: add back DTO programming when DPPCLK restore is fixed in FSDL*/ if (dpp_clock_lowered) { // increase per DPP DTO before lowering global dppclk + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); dcn31_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); } else { // increase global DPPCLK before lowering per DPP DTO if (update_dppclk || update_dispclk) dcn31_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); + // always update dtos unless clock is lowered and not safe to lower + if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } // notify DMCUB of latest clocks @@ -368,32 +371,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index da942e9f5142..935a50d6e933 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -71,8 +71,6 @@ #include "dmub/dmub_srv.h" -#include "dcn30/dcn30_vpg.h" - #include "i2caux_interface.h" #include "dce/dmub_hw_lock_mgr.h" @@ -2356,6 +2354,11 @@ static enum surface_update_type check_update_surfaces_for_stream( if (stream_update->dsc_config) su_flags->bits.dsc_changed = 1; +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (stream_update->mst_bw_update) + su_flags->bits.mst_bw = 1; +#endif + if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2674,9 +2677,6 @@ static void commit_planes_do_stream_update(struct dc *dc, enum surface_update_type update_type, struct dc_state *context) { -#if defined(CONFIG_DRM_AMD_DC_DCN) - struct vpg *vpg; -#endif int j; // Stream updates @@ -2697,11 +2697,6 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->vrr_infopacket || stream_update->vsc_infopacket || stream_update->vsp_infopacket) { -#if defined(CONFIG_DRM_AMD_DC_DCN) - vpg = pipe_ctx->stream_res.stream_enc->vpg; - if (vpg && vpg->funcs->vpg_poweron) - vpg->funcs->vpg_poweron(vpg); -#endif resource_build_info_frame(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); } @@ -2741,6 +2736,15 @@ static void commit_planes_do_stream_update(struct dc *dc, if (stream_update->dsc_config) dp_update_dsc_config(pipe_ctx); +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (stream_update->mst_bw_update) { + if (stream_update->mst_bw_update->is_increase) + dc_link_increase_mst_payload(pipe_ctx, stream_update->mst_bw_update->mst_stream_bw); + else + dc_link_reduce_mst_payload(pipe_ctx, stream_update->mst_bw_update->mst_stream_bw); + } +#endif + if (stream_update->pending_test_pattern) { dc_link_dp_set_test_pattern(stream->link, stream->test_pattern.type, @@ -3118,8 +3122,13 @@ void dc_commit_updates_for_stream(struct dc *dc, if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) new_pipe->plane_state->force_full_update = true; } - } else if (update_type == UPDATE_TYPE_FAST) { - /* Previous frame finished and HW is ready for optimization. */ + } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= DCE_VERSION_MAX) { + /* + * Previous frame finished and HW is ready for optimization. + * + * Only relevant for DCN behavior where we can guarantee the optimization + * is safe to apply - retain the legacy behavior for DCE. + */ dc_post_update_surfaces_to_stream(dc); } @@ -3178,6 +3187,12 @@ void dc_commit_updates_for_stream(struct dc *dc, } } + /* Legacy optimization path for DCE. */ + if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) { + dc_post_update_surfaces_to_stream(dc); + TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); + } + return; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ca5dc3c168ec..ec5f107bc85a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -674,13 +674,13 @@ static void query_hdcp_capability(enum signal_type signal, struct dc_link *link) static void read_current_link_settings_on_detect(struct dc_link *link) { - union lane_count_set lane_count_set = { {0} }; + union lane_count_set lane_count_set = {0}; uint8_t link_bw_set; uint8_t link_rate_set; uint32_t read_dpcd_retry_cnt = 10; enum dc_status status = DC_ERROR_UNEXPECTED; int i; - union max_down_spread max_down_spread = { {0} }; + union max_down_spread max_down_spread = {0}; // Read DPCD 00101h to find out the number of lanes currently set for (i = 0; i < read_dpcd_retry_cnt; i++) { @@ -1869,8 +1869,13 @@ static enum dc_status enable_link_dp(struct dc_state *state, do_fallback = true; #if defined(CONFIG_DRM_AMD_DC_DCN) + /* + * Temporary w/a to get DP2.0 link rates to work with SST. + * TODO DP2.0 - Workaround: Remove w/a if and when the issue is resolved. + */ if (dp_get_link_encoding_format(&link_settings) == DP_128b_132b_ENCODING && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT) { + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + link->dc->debug.set_mst_en_for_sst) { dp_enable_mst_on_sink(link, true); } #endif @@ -1983,51 +1988,6 @@ static enum dc_status enable_link_dp_mst( return enable_link_dp(state, pipe_ctx); } -void blank_all_dp_displays(struct dc *dc, bool hw_init) -{ - unsigned int i, j, fe; - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - enum signal_type signal = dc->links[i]->connector_signal; - - if ((signal == SIGNAL_TYPE_EDP) || - (signal == SIGNAL_TYPE_DISPLAY_PORT)) { - if (hw_init && signal != SIGNAL_TYPE_EDP && dc->links[i]->priv != NULL) { - /* DP 2.0 spec requires that we read LTTPR caps first */ - dp_retrieve_lttpr_cap(dc->links[i]); - /* if any of the displays are lit up turn them off */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - } - - if ((signal != SIGNAL_TYPE_EDP && status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) || - (!hw_init && dc->links[i]->link_enc && - dc->links[i]->link_enc->funcs->is_dig_enabled(dc->links[i]->link_enc))) { - if (dc->links[i]->link_enc->funcs->get_dig_frontend) { - fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc); - if (fe == ENGINE_ID_UNKNOWN) - continue; - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - - if (!dc->links[i]->wa_flags.dp_keep_receiver_powered || - (hw_init && signal != SIGNAL_TYPE_EDP && dc->links[i]->priv != NULL)) - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } - -} - static bool get_ext_hdmi_settings(struct pipe_ctx *pipe_ctx, enum engine_id eng_id, struct ext_hdmi_settings *settings) @@ -3272,10 +3232,12 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx) static void update_mst_stream_alloc_table( struct dc_link *link, struct stream_encoder *stream_enc, +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct hpo_dp_stream_encoder *hpo_dp_stream_enc, // TODO: Rename stream_enc to dio_stream_enc? +#endif const struct dp_mst_stream_allocation_table *proposed_table) { - struct link_mst_stream_allocation work_table[MAX_CONTROLLER_NUM] = { - { 0 } }; + struct link_mst_stream_allocation work_table[MAX_CONTROLLER_NUM] = { 0 }; struct link_mst_stream_allocation *dc_alloc; int i; @@ -3308,6 +3270,9 @@ static void update_mst_stream_alloc_table( work_table[i].slot_count = proposed_table->stream_allocations[i].slot_count; work_table[i].stream_enc = stream_enc; +#if defined(CONFIG_DRM_AMD_DC_DCN) + work_table[i].hpo_dp_stream_enc = hpo_dp_stream_enc; +#endif } } @@ -3430,11 +3395,15 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) struct dc_link *link = stream->link; struct link_encoder *link_encoder = NULL; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc; + struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; +#endif struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; - uint8_t i; + int i; enum act_return_status ret; DC_LOGGER_INIT(link->ctx->logger); @@ -3457,7 +3426,14 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) &proposed_table, true)) { update_mst_stream_alloc_table( +#if defined(CONFIG_DRM_AMD_DC_DCN) + link, + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.hpo_dp_stream_enc, + &proposed_table); +#else link, pipe_ctx->stream_res.stream_enc, &proposed_table); +#endif } else DC_LOG_WARNING("Failed to update" @@ -3471,23 +3447,56 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { +#if defined(CONFIG_DRM_AMD_DC_DCN) DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].hpo_dp_stream_enc: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc, + i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, link->mst_stream_alloc_table.stream_allocations[i].slot_count); +#else + DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].vcp_id: %d " + "stream[%d].slot_count: %d\n", + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + i, + link->mst_stream_alloc_table.stream_allocations[i].vcp_id, + i, + link->mst_stream_alloc_table.stream_allocations[i].slot_count); +#endif } ASSERT(proposed_table.stream_count > 0); /* program DP source TX for payload */ +#if defined(CONFIG_DRM_AMD_DC_DCN) + switch (dp_get_link_encoding_format(&link->cur_link_settings)) { + case DP_8b_10b_ENCODING: + link_encoder->funcs->update_mst_stream_allocation_table( + link_encoder, + &link->mst_stream_alloc_table); + break; + case DP_128b_132b_ENCODING: + hpo_dp_link_encoder->funcs->update_stream_allocation_table( + hpo_dp_link_encoder, + &link->mst_stream_alloc_table); + break; + case DP_UNKNOWN_ENCODING: + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } +#else link_encoder->funcs->update_mst_stream_allocation_table( link_encoder, &link->mst_stream_alloc_table); +#endif /* send down message */ ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( @@ -3510,23 +3519,205 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) pbn = get_pbn_from_timing(pipe_ctx); avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot); +#if defined(CONFIG_DRM_AMD_DC_DCN) + switch (dp_get_link_encoding_format(&link->cur_link_settings)) { + case DP_8b_10b_ENCODING: + stream_encoder->funcs->set_throttled_vcp_size( + stream_encoder, + avg_time_slots_per_mtp); + break; + case DP_128b_132b_ENCODING: + hpo_dp_link_encoder->funcs->set_throttled_vcp_size( + hpo_dp_link_encoder, + hpo_dp_stream_encoder->inst, + avg_time_slots_per_mtp); + break; + case DP_UNKNOWN_ENCODING: + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } +#else stream_encoder->funcs->set_throttled_vcp_size( stream_encoder, avg_time_slots_per_mtp); +#endif return DC_OK; } +#if defined(CONFIG_DRM_AMD_DC_DCN) +enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in_kbps) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + struct fixed31_32 avg_time_slots_per_mtp; + struct fixed31_32 pbn; + struct fixed31_32 pbn_per_slot; + struct link_encoder *link_encoder = link->link_enc; + struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; + struct dp_mst_stream_allocation_table proposed_table = {0}; + uint8_t i; + enum act_return_status ret; + DC_LOGGER_INIT(link->ctx->logger); + + /* decrease throttled vcp size */ + pbn_per_slot = get_pbn_per_slot(stream); + pbn = get_pbn_from_bw_in_kbps(bw_in_kbps); + avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot); + + stream_encoder->funcs->set_throttled_vcp_size( + stream_encoder, + avg_time_slots_per_mtp); + + /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ + dm_helpers_dp_mst_send_payload_allocation( + stream->ctx, + stream, + true); + + /* notify immediate branch device table update */ + if (dm_helpers_dp_mst_write_payload_allocation_table( + stream->ctx, + stream, + &proposed_table, + true)) { + /* update mst stream allocation table software state */ + update_mst_stream_alloc_table( + link, + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.hpo_dp_stream_enc, + &proposed_table); + } else { + DC_LOG_WARNING("Failed to update" + "MST allocation table for" + "pipe idx:%d\n", + pipe_ctx->pipe_idx); + } + + DC_LOG_MST("%s " + "stream_count: %d: \n ", + __func__, + link->mst_stream_alloc_table.stream_count); + + for (i = 0; i < MAX_CONTROLLER_NUM; i++) { + DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].vcp_id: %d " + "stream[%d].slot_count: %d\n", + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + i, + link->mst_stream_alloc_table.stream_allocations[i].vcp_id, + i, + link->mst_stream_alloc_table.stream_allocations[i].slot_count); + } + + ASSERT(proposed_table.stream_count > 0); + + /* update mst stream allocation table hardware state */ + link_encoder->funcs->update_mst_stream_allocation_table( + link_encoder, + &link->mst_stream_alloc_table); + + /* poll for immediate branch device ACT handled */ + ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( + stream->ctx, + stream); + + return DC_OK; +} + +enum dc_status dc_link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in_kbps) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + struct fixed31_32 avg_time_slots_per_mtp; + struct fixed31_32 pbn; + struct fixed31_32 pbn_per_slot; + struct link_encoder *link_encoder = link->link_enc; + struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; + struct dp_mst_stream_allocation_table proposed_table = {0}; + uint8_t i; + enum act_return_status ret; + DC_LOGGER_INIT(link->ctx->logger); + + /* notify immediate branch device table update */ + if (dm_helpers_dp_mst_write_payload_allocation_table( + stream->ctx, + stream, + &proposed_table, + true)) { + /* update mst stream allocation table software state */ + update_mst_stream_alloc_table( + link, + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.hpo_dp_stream_enc, + &proposed_table); + } + + DC_LOG_MST("%s " + "stream_count: %d: \n ", + __func__, + link->mst_stream_alloc_table.stream_count); + + for (i = 0; i < MAX_CONTROLLER_NUM; i++) { + DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].vcp_id: %d " + "stream[%d].slot_count: %d\n", + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + i, + link->mst_stream_alloc_table.stream_allocations[i].vcp_id, + i, + link->mst_stream_alloc_table.stream_allocations[i].slot_count); + } + + ASSERT(proposed_table.stream_count > 0); + + /* update mst stream allocation table hardware state */ + link_encoder->funcs->update_mst_stream_allocation_table( + link_encoder, + &link->mst_stream_alloc_table); + + /* poll for immediate branch device ACT handled */ + ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( + stream->ctx, + stream); + + if (ret != ACT_LINK_LOST) { + /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ + dm_helpers_dp_mst_send_payload_allocation( + stream->ctx, + stream, + true); + } + + /* increase throttled vcp size */ + pbn = get_pbn_from_bw_in_kbps(bw_in_kbps); + pbn_per_slot = get_pbn_per_slot(stream); + avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot); + + stream_encoder->funcs->set_throttled_vcp_size( + stream_encoder, + avg_time_slots_per_mtp); + + return DC_OK; +} +#endif + static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; struct link_encoder *link_encoder = NULL; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct hpo_dp_link_encoder *hpo_dp_link_encoder = link->hpo_dp_link_enc; + struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; +#endif struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); - uint8_t i; + int i; bool mst_mode = (link->type == dc_connection_mst_branch); DC_LOGGER_INIT(link->ctx->logger); @@ -3545,9 +3736,28 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) */ /* slot X.Y */ +#if defined(CONFIG_DRM_AMD_DC_DCN) + switch (dp_get_link_encoding_format(&link->cur_link_settings)) { + case DP_8b_10b_ENCODING: + stream_encoder->funcs->set_throttled_vcp_size( + stream_encoder, + avg_time_slots_per_mtp); + break; + case DP_128b_132b_ENCODING: + hpo_dp_link_encoder->funcs->set_throttled_vcp_size( + hpo_dp_link_encoder, + hpo_dp_stream_encoder->inst, + avg_time_slots_per_mtp); + break; + case DP_UNKNOWN_ENCODING: + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } +#else stream_encoder->funcs->set_throttled_vcp_size( stream_encoder, avg_time_slots_per_mtp); +#endif /* TODO: which component is responsible for remove payload table? */ if (mst_mode) { @@ -3557,8 +3767,16 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) &proposed_table, false)) { +#if defined(CONFIG_DRM_AMD_DC_DCN) + update_mst_stream_alloc_table( + link, + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.hpo_dp_stream_enc, + &proposed_table); +#else update_mst_stream_alloc_table( link, pipe_ctx->stream_res.stream_enc, &proposed_table); +#endif } else { DC_LOG_WARNING("Failed to update" @@ -3574,20 +3792,53 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { +#if defined(CONFIG_DRM_AMD_DC_DCN) DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].hpo_dp_stream_enc: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc, + i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, link->mst_stream_alloc_table.stream_allocations[i].slot_count); +#else + DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].vcp_id: %d " + "stream[%d].slot_count: %d\n", + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + i, + link->mst_stream_alloc_table.stream_allocations[i].vcp_id, + i, + link->mst_stream_alloc_table.stream_allocations[i].slot_count); +#endif } +#if defined(CONFIG_DRM_AMD_DC_DCN) + switch (dp_get_link_encoding_format(&link->cur_link_settings)) { + case DP_8b_10b_ENCODING: + link_encoder->funcs->update_mst_stream_allocation_table( + link_encoder, + &link->mst_stream_alloc_table); + break; + case DP_128b_132b_ENCODING: + hpo_dp_link_encoder->funcs->update_stream_allocation_table( + hpo_dp_link_encoder, + &link->mst_stream_alloc_table); + break; + case DP_UNKNOWN_ENCODING: + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } +#else link_encoder->funcs->update_mst_stream_allocation_table( link_encoder, &link->mst_stream_alloc_table); +#endif if (mst_mode) { dm_helpers_dp_mst_poll_for_allocation_change_trigger( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index b0f1cd7268c8..471a67a64299 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -763,7 +763,7 @@ void dal_ddc_service_read_scdc_data(struct ddc_service *ddc_service) dal_ddc_service_query_ddc_data(ddc_service, slave_address, &offset, sizeof(offset), &tmds_config, sizeof(tmds_config)); if (tmds_config & 0x1) { - union hdmi_scdc_status_flags_data status_data = { {0} }; + union hdmi_scdc_status_flags_data status_data = {0}; uint8_t scramble_status = 0; offset = HDMI_SCDC_SCRAMBLER_STATUS; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 54662d74c65a..bb96e4e9ccfc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -259,7 +259,7 @@ static void dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern) { - union dpcd_training_pattern dpcd_pattern = { {0} }; + union dpcd_training_pattern dpcd_pattern = {0}; dpcd_pattern.v1_4.TRAINING_PATTERN_SET = dc_dp_training_pattern_to_dpcd_training_pattern( @@ -401,8 +401,8 @@ enum dc_status dpcd_set_link_settings( uint8_t rate; enum dc_status status; - union down_spread_ctrl downspread = { {0} }; - union lane_count_set lane_count_set = { {0} }; + union down_spread_ctrl downspread = {0}; + union lane_count_set lane_count_set = {0}; downspread.raw = (uint8_t) (lt_settings->link_settings.link_spread); @@ -520,7 +520,7 @@ static void dpcd_set_lt_pattern_and_lane_settings( uint32_t dpcd_base_lt_offset; uint8_t dpcd_lt_buffer[5] = {0}; - union dpcd_training_pattern dpcd_pattern = { {0} }; + union dpcd_training_pattern dpcd_pattern = { 0 }; uint32_t size_in_bytes; bool edp_workaround = false; /* TODO link_prop.INTERNAL */ dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET; @@ -1266,8 +1266,8 @@ static enum link_training_result perform_channel_equalization_sequence( uint32_t retries_ch_eq; uint32_t wait_time_microsec; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; - union lane_align_status_updated dpcd_lane_status_updated = { {0} }; - union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } }; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; /* Note: also check that TPS4 is a supported feature*/ @@ -1487,7 +1487,7 @@ static inline enum link_training_result dp_transition_to_video_idle( struct link_training_settings *lt_settings, enum link_training_result status) { - union lane_count_set lane_count_set = { {0} }; + union lane_count_set lane_count_set = {0}; /* 4. mainlink output idle pattern*/ dp_set_hw_test_pattern(link, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); @@ -1800,7 +1800,7 @@ static enum dc_status configure_lttpr_mode_non_transparent( static void repeater_training_done(struct dc_link *link, uint32_t offset) { - union dpcd_training_pattern dpcd_pattern = { {0} }; + union dpcd_training_pattern dpcd_pattern = {0}; const uint32_t dpcd_base_lt_offset = DP_TRAINING_PATTERN_SET_PHY_REPEATER1 + @@ -2078,8 +2078,8 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( uint32_t aux_rd_interval = 0; uint32_t wait_time = 0; struct link_training_settings req_settings; - union lane_align_status_updated dpcd_lane_status_updated = { {0} }; - union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } }; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; enum link_training_result status = LINK_TRAINING_SUCCESS; /* Transmit 128b/132b_TPS1 over Main-Link and Set TRAINING_PATTERN_SET to 01h */ @@ -2149,8 +2149,8 @@ static enum link_training_result dp_perform_128b_132b_cds_done_sequence( /* Assumption: assume hardware has transmitted eq pattern */ enum link_training_result status = LINK_TRAINING_SUCCESS; struct link_training_settings req_settings; - union lane_align_status_updated dpcd_lane_status_updated = { {0} }; - union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = { { {0} } }; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; uint32_t wait_time = 0; /* initiate CDS done sequence */ @@ -2863,7 +2863,7 @@ bool dp_verify_link_cap( link->verified_link_cap = *known_limit_link_setting; return true; } else if (link->link_enc && link->dc->res_pool->funcs->link_encs_assign && - !link_enc_cfg_is_link_enc_avail(link->ctx->dc, link->link_enc->preferred_engine)) { + !link_enc_cfg_is_link_enc_avail(link->ctx->dc, link->link_enc->preferred_engine, link)) { link->verified_link_cap = initial_link_settings; return true; } @@ -4065,8 +4065,8 @@ void dc_link_dp_handle_link_loss(struct dc_link *link) bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss, bool defer_handling, bool *has_left_work) { - union hpd_irq_data hpd_irq_dpcd_data = { { { {0} } } }; - union device_service_irq device_service_clear = { { 0 } }; + union hpd_irq_data hpd_irq_dpcd_data = {0}; + union device_service_irq device_service_clear = {0}; enum dc_status result; bool status = false; @@ -5939,7 +5939,7 @@ bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timin uint8_t link_bw_set; uint8_t link_rate_set; uint32_t req_bw; - union lane_count_set lane_count_set = { {0} }; + union lane_count_set lane_count_set = {0}; ASSERT(link || crtc_timing); // invalid input @@ -5993,6 +5993,25 @@ enum dp_link_encoding dp_get_link_encoding_format(const struct dc_link_settings } #if defined(CONFIG_DRM_AMD_DC_DCN) +enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link) +{ + struct dc_link_settings link_settings = {0}; + + if (!dc_is_dp_signal(link->connector_signal)) + return DP_UNKNOWN_ENCODING; + + if (link->preferred_link_setting.lane_count != + LANE_COUNT_UNKNOWN && + link->preferred_link_setting.link_rate != + LINK_RATE_UNKNOWN) { + link_settings = link->preferred_link_setting; + } else { + decide_mst_link_settings(link, &link_settings); + } + + return dp_get_link_encoding_format(&link_settings); +} + // TODO - DP2.0 Link: Fix get_lane_status to handle LTTPR offset (SST and MST) static void get_lane_status( struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 1cab4bf06abe..72b0f8594b4a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -488,16 +488,19 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream( return link_enc; } -bool link_enc_cfg_is_link_enc_avail(struct dc *dc, enum engine_id eng_id) +bool link_enc_cfg_is_link_enc_avail(struct dc *dc, enum engine_id eng_id, struct dc_link *link) { bool is_avail = true; int i; - /* Add assigned encoders to list. */ + /* An encoder is not available if it has already been assigned to a different endpoint. */ for (i = 0; i < MAX_PIPES; i++) { struct link_enc_assignment assignment = get_assignment(dc, i); + struct display_endpoint_id ep_id = (struct display_endpoint_id) { + .link_id = link->link_id, + .ep_type = link->ep_type}; - if (assignment.valid && assignment.eng_id == eng_id) { + if (assignment.valid && assignment.eng_id == eng_id && !are_ep_ids_equal(&ep_id, &assignment.ep_id)) { is_avail = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 561c10a92bb5..9e83fd54e2ca 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3009,6 +3009,11 @@ enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *pla { enum dc_status res = DC_OK; + /* check if surface has invalid dimensions */ + if (plane_state->src_rect.width == 0 || plane_state->src_rect.height == 0 || + plane_state->dst_rect.width == 0 || plane_state->dst_rect.height == 0) + return DC_FAIL_SURFACE_VALIDATE; + /* TODO For now validates pixel format only */ if (dc->res_pool->funcs->validate_plane) return dc->res_pool->funcs->validate_plane(plane_state, &dc->caps); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index f0f54f4d3d9b..57cf4cb82370 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -202,6 +202,10 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) new_stream->stream_id = new_stream->ctx->dc_stream_id_count; new_stream->ctx->dc_stream_id_count++; + /* If using dynamic encoder assignment, wait till stream committed to assign encoder. */ + if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign) + new_stream->link_enc = NULL; + kref_init(&new_stream->refcount); return new_stream; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index dd995905b0cb..fc3f0fd1f068 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.156" +#define DC_VER "3.2.157" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -664,6 +664,7 @@ struct dc_debug_options { #if defined(CONFIG_DRM_AMD_DC_DCN) /* TODO - remove once tested */ bool legacy_dp2_lt; + bool set_mst_en_for_sst; #endif union mem_low_power_enable_options enable_mem_low_power; union root_clock_optimization_options root_clock_optimization; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 69b008bafbbc..08815310d85b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -277,7 +277,6 @@ bool dc_link_setup_psr(struct dc_link *dc_link, struct psr_context *psr_context); void dc_link_get_psr_residency(const struct dc_link *link, uint32_t *residency); -void blank_all_dp_displays(struct dc *dc, bool hw_init); /* Request DC to detect if there is a Panel connected. * boot - If this call is during initial boot. @@ -296,6 +295,10 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); +#if defined(CONFIG_DRM_AMD_DC_DCN) +enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t req_pbn); +enum dc_status dc_link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t req_pbn); +#endif /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: @@ -425,4 +428,7 @@ uint32_t dc_bandwidth_in_kbps_from_timing( bool dc_link_is_fec_supported(const struct dc_link *link); bool dc_link_should_enable_fec(const struct dc_link *link); +#if defined(CONFIG_DRM_AMD_DC_DCN) +enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link); +#endif #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index b8ebc1f09538..e37c4a10bfd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -115,6 +115,13 @@ struct periodic_interrupt_config { int lines_offset; }; +#if defined(CONFIG_DRM_AMD_DC_DCN) +struct dc_mst_stream_bw_update { + bool is_increase; // is bandwidth reduced or increased + uint32_t mst_stream_bw; // new mst bandwidth in kbps +}; +#endif + union stream_update_flags { struct { uint32_t scaling:1; @@ -125,6 +132,9 @@ union stream_update_flags { uint32_t gamut_remap:1; uint32_t wb_update:1; uint32_t dsc_changed : 1; +#if defined(CONFIG_DRM_AMD_DC_DCN) + uint32_t mst_bw : 1; +#endif } bits; uint32_t raw; @@ -278,6 +288,9 @@ struct dc_stream_update { struct dc_writeback_update *wb_update; struct dc_dsc_config *dsc_config; +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct dc_mst_stream_bw_update *mst_bw_update; +#endif struct dc_transfer_func *func_shaper; struct dc_3dlut *lut3d_func; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 15c353c389d8..388457ffc0a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -653,6 +653,7 @@ enum dc_psr_state { PSR_STATE1a, PSR_STATE2, PSR_STATE2a, + PSR_STATE2b, PSR_STATE3, PSR_STATE3Init, PSR_STATE4, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index aa8403bc4c83..05d96ca80512 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -50,6 +50,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) state = PSR_STATE2; else if (raw_state == 0x21) state = PSR_STATE2a; + else if (raw_state == 0x22) + state = PSR_STATE2b; else if (raw_state == 0x30) state = PSR_STATE3; else if (raw_state == 0x31) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 8108f9ae2638..af3e68d3e747 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1649,13 +1649,31 @@ static enum dc_status apply_single_controller_ctx_to_hw( static void power_down_encoders(struct dc *dc) { - int i; - - blank_all_dp_displays(dc, false); + int i, j; for (i = 0; i < dc->link_count; i++) { enum signal_type signal = dc->links[i]->connector_signal; + if ((signal == SIGNAL_TYPE_EDP) || + (signal == SIGNAL_TYPE_DISPLAY_PORT)) { + if (dc->links[i]->link_enc->funcs->get_dig_frontend && + dc->links[i]->link_enc->funcs->is_dig_enabled(dc->links[i]->link_enc)) { + unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend( + dc->links[i]->link_enc); + + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], + dc->res_pool->stream_enc[j]); + break; + } + } + } + + if (!dc->links[i]->wa_flags.dp_keep_receiver_powered) + dp_receiver_power_ctrl(dc->links[i], false); + } + if (signal != SIGNAL_TYPE_EDP) signal = SIGNAL_TYPE_NONE; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index bc3ec05bf34b..44d27579d898 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1366,7 +1366,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) void dcn10_init_hw(struct dc *dc) { - int i; + int i, j; struct abm *abm = dc->res_pool->abm; struct dmcu *dmcu = dc->res_pool->dmcu; struct dce_hwseq *hws = dc->hwseq; @@ -1462,8 +1462,43 @@ void dcn10_init_hw(struct dc *dc) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) - blank_all_dp_displays(dc, true); + if (dc->config.power_down_display_on_boot) { + uint8_t dpcd_power_state = '\0'; + enum dc_status status = DC_ERROR_UNEXPECTED; + + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) + continue; + + /* DP 2.0 requires that LTTPR Caps be read first */ + dp_retrieve_lttpr_cap(dc->links[i]); + + /* + * If any of the displays are lit up turn them off. + * The reason is that some MST hubs cannot be turned off + * completely until we tell them to do so. + * If not turned off, then displays connected to MST hub + * won't light up. + */ + status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); + if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { + /* blank dp stream before power off receiver*/ + if (dc->links[i]->link_enc->funcs->get_dig_frontend) { + unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc); + + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], + dc->res_pool->stream_enc[j]); + break; + } + } + } + dp_receiver_power_ctrl(dc->links[i], false); + } + } + } /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -2304,8 +2339,8 @@ static void mmhub_read_vm_context0_settings(struct dcn10_hubp *hubp1, void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - struct vm_system_aperture_param apt = { {{ 0 } } }; - struct vm_context0_param vm0 = { { { 0 } } }; + struct vm_system_aperture_param apt = {0}; + struct vm_context0_param vm0 = {0}; mmhub_read_vm_system_aperture_settings(hubp1, &apt, hws); mmhub_read_vm_context0_settings(hubp1, &vm0, hws); @@ -2478,7 +2513,7 @@ void dcn10_update_visual_confirm_color(struct dc *dc, struct pipe_ctx *pipe_ctx, void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct mpcc_blnd_cfg blnd_cfg = {{0}}; + struct mpcc_blnd_cfg blnd_cfg = {0}; bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe; int mpcc_id; struct mpcc *new_mpcc; @@ -3635,7 +3670,7 @@ void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx) void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings) { - struct encoder_unblank_param params = { { 0 } }; + struct encoder_unblank_param params = {0}; struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; struct dce_hwseq *hws = link->dc->hwseq; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index ede65100a050..f98aba308028 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -169,7 +169,29 @@ type DTBCLK_DTO_DIV[MAX_PIPES];\ type DCCG_AUDIO_DTO_SEL;\ type DCCG_AUDIO_DTO0_SOURCE_SEL;\ - type DENTIST_DISPCLK_CHG_MODE; + type DENTIST_DISPCLK_CHG_MODE;\ + type DSCCLK0_DTO_PHASE;\ + type DSCCLK0_DTO_MODULO;\ + type DSCCLK1_DTO_PHASE;\ + type DSCCLK1_DTO_MODULO;\ + type DSCCLK2_DTO_PHASE;\ + type DSCCLK2_DTO_MODULO;\ + type DSCCLK0_DTO_ENABLE;\ + type DSCCLK1_DTO_ENABLE;\ + type DSCCLK2_DTO_ENABLE;\ + type SYMCLK32_ROOT_SE0_GATE_DISABLE;\ + type SYMCLK32_ROOT_SE1_GATE_DISABLE;\ + type SYMCLK32_ROOT_SE2_GATE_DISABLE;\ + type SYMCLK32_ROOT_SE3_GATE_DISABLE;\ + type SYMCLK32_ROOT_LE0_GATE_DISABLE;\ + type SYMCLK32_ROOT_LE1_GATE_DISABLE;\ + type DPSTREAMCLK_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK_GATE_DISABLE;\ + type HDMISTREAMCLK0_DTO_PHASE;\ + type HDMISTREAMCLK0_DTO_MODULO;\ + type HDMICHARCLK0_GATE_DISABLE;\ + type HDMICHARCLK0_ROOT_GATE_DISABLE; + struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) @@ -205,6 +227,16 @@ struct dccg_registers { uint32_t SYMCLK32_SE_CNTL; uint32_t SYMCLK32_LE_CNTL; uint32_t DENTIST_DISPCLK_CNTL; + uint32_t DSCCLK_DTO_CTRL; + uint32_t DSCCLK0_DTO_PARAM; + uint32_t DSCCLK1_DTO_PARAM; + uint32_t DSCCLK2_DTO_PARAM; + uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE; + uint32_t DPSTREAMCLK_GATE_DISABLE; + uint32_t DCCG_GATE_DISABLE_CNTL3; + uint32_t HDMISTREAMCLK0_DTO_PARAM; + uint32_t DCCG_GATE_DISABLE_CNTL4; + }; struct dcn_dccg { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index fc83744149d9..cfee456c6c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2123,7 +2123,7 @@ void dcn20_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx) void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings) { - struct encoder_unblank_param params = { { 0 } }; + struct encoder_unblank_param params = {0}; struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; struct dce_hwseq *hws = link->dc->hwseq; @@ -2298,7 +2298,7 @@ void dcn20_update_visual_confirm_color(struct dc *dc, struct pipe_ctx *pipe_ctx, void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct mpcc_blnd_cfg blnd_cfg = { {0} }; + struct mpcc_blnd_cfg blnd_cfg = {0}; bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha; int mpcc_id; struct mpcc *new_mpcc; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 01a90badd173..df2717116604 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -437,7 +437,7 @@ void dcn30_init_hw(struct dc *dc) struct dce_hwseq *hws = dc->hwseq; struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; - int i; + int i, j; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; @@ -534,8 +534,41 @@ void dcn30_init_hw(struct dc *dc) hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) - blank_all_dp_displays(dc, true); + if (dc->config.power_down_display_on_boot) { + uint8_t dpcd_power_state = '\0'; + enum dc_status status = DC_ERROR_UNEXPECTED; + + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) + continue; + /* DP 2.0 states that LTTPR regs must be read first */ + dp_retrieve_lttpr_cap(dc->links[i]); + + /* if any of the displays are lit up turn them off */ + status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); + if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { + /* blank dp stream before power off receiver*/ + if (dc->links[i]->link_enc->funcs->get_dig_frontend) { + unsigned int fe; + + fe = dc->links[i]->link_enc->funcs->get_dig_frontend( + dc->links[i]->link_enc); + if (fe == ENGINE_ID_UNKNOWN) + continue; + + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], + dc->res_pool->stream_enc[j]); + break; + } + } + } + dp_receiver_power_ctrl(dc->links[i], false); + } + } + } /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -969,7 +1002,8 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc, /* turning off DPG */ pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, false); for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = mpcc_pipe->bottom_pipe) - mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false); + if (mpcc_pipe->plane_res.hubp) + mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false); stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, test_pattern, color_space, color_depth, solid_color, width, height, offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c index 6bd7a0626665..de5e18c2a3ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c @@ -129,7 +129,7 @@ static void apg31_se_audio_setup( /* When running in "pair mode", pairs of audio channels have their own enable * this is for really old audio drivers */ - REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 0xF); + REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 0xFF); // REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, channels); /* Disable forced mem power off */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 9896adf67425..815481a3ef54 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -26,6 +26,7 @@ #include "reg_helper.h" #include "core_types.h" #include "dcn31_dccg.h" +#include "dal_asic_id.h" #define TO_DCN_DCCG(dccg)\ container_of(dccg, struct dcn_dccg, base) @@ -42,10 +43,58 @@ #define DC_LOGGER \ dccg->ctx->logger -void dccg31_set_dpstreamclk( - struct dccg *dccg, - enum hdmistreamclk_source src, - int otg_inst) +static void dccg31_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + REG_UPDATE(DPPCLK_DTO_CTRL, + DPPCLK_DTO_ENABLE[dpp_inst], 1); + } else { + //DTO must be enabled to generate a 0Hz clock output + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { + REG_UPDATE(DPPCLK_DTO_CTRL, + DPPCLK_DTO_ENABLE[dpp_inst], 1); + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, 0, + DPPCLK0_DTO_MODULO, 1); + } else { + REG_UPDATE(DPPCLK_DTO_CTRL, + DPPCLK_DTO_ENABLE[dpp_inst], 0); + } + } + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +static enum phyd32clk_clock_source get_phy_mux_symclk( + struct dcn_dccg *dccg_dcn, + enum phyd32clk_clock_source src) +{ + if (dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + if (src == PHYD32CLKC) + src = PHYD32CLKF; + if (src == PHYD32CLKD) + src = PHYD32CLKG; + } + return src; +} + +static void dccg31_enable_dpstreamclk(struct dccg *dccg, int otg_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -53,19 +102,53 @@ void dccg31_set_dpstreamclk( switch (otg_inst) { case 0: REG_UPDATE(DPSTREAMCLK_CNTL, - DPSTREAMCLK_PIPE0_EN, (src == REFCLK) ? 0 : 1); + DPSTREAMCLK_PIPE0_EN, 1); + break; + case 1: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK_PIPE1_EN, 1); + break; + case 2: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK_PIPE2_EN, 1); + break; + case 3: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK_PIPE3_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + DPSTREAMCLK_ROOT_GATE_DISABLE, 1); +} + +static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + DPSTREAMCLK_ROOT_GATE_DISABLE, 0); + + switch (otg_inst) { + case 0: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK_PIPE0_EN, 0); break; case 1: REG_UPDATE(DPSTREAMCLK_CNTL, - DPSTREAMCLK_PIPE1_EN, (src == REFCLK) ? 0 : 1); + DPSTREAMCLK_PIPE1_EN, 0); break; case 2: REG_UPDATE(DPSTREAMCLK_CNTL, - DPSTREAMCLK_PIPE2_EN, (src == REFCLK) ? 0 : 1); + DPSTREAMCLK_PIPE2_EN, 0); break; case 3: REG_UPDATE(DPSTREAMCLK_CNTL, - DPSTREAMCLK_PIPE3_EN, (src == REFCLK) ? 0 : 1); + DPSTREAMCLK_PIPE3_EN, 0); break; default: BREAK_TO_DEBUGGER(); @@ -73,6 +156,17 @@ void dccg31_set_dpstreamclk( } } +void dccg31_set_dpstreamclk( + struct dccg *dccg, + enum hdmistreamclk_source src, + int otg_inst) +{ + if (src == REFCLK) + dccg31_disable_dpstreamclk(dccg, otg_inst); + else + dccg31_enable_dpstreamclk(dccg, otg_inst); +} + void dccg31_enable_symclk32_se( struct dccg *dccg, int hpo_se_inst, @@ -80,24 +174,38 @@ void dccg31_enable_symclk32_se( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk); + /* select one of the PHYD32CLKs as the source for symclk32_se */ switch (hpo_se_inst) { case 0: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE0_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, phyd32clk, SYMCLK32_SE0_EN, 1); break; case 1: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE1_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, phyd32clk, SYMCLK32_SE1_EN, 1); break; case 2: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE2_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, phyd32clk, SYMCLK32_SE2_EN, 1); break; case 3: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE3_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, phyd32clk, SYMCLK32_SE3_EN, 1); @@ -120,21 +228,33 @@ void dccg31_disable_symclk32_se( REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, 0, SYMCLK32_SE0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE0_GATE_DISABLE, 0); break; case 1: REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, 0, SYMCLK32_SE1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE1_GATE_DISABLE, 0); break; case 2: REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, 0, SYMCLK32_SE2_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE2_GATE_DISABLE, 0); break; case 3: REG_UPDATE_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, 0, SYMCLK32_SE3_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_SE3_GATE_DISABLE, 0); break; default: BREAK_TO_DEBUGGER(); @@ -149,14 +269,22 @@ void dccg31_enable_symclk32_le( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + phyd32clk = get_phy_mux_symclk(dccg_dcn, phyd32clk); + /* select one of the PHYD32CLKs as the source for symclk32_le */ switch (hpo_le_inst) { case 0: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_LE0_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, phyd32clk, SYMCLK32_LE0_EN, 1); break; case 1: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_LE1_GATE_DISABLE, 1); REG_UPDATE_2(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, phyd32clk, SYMCLK32_LE1_EN, 1); @@ -179,11 +307,87 @@ void dccg31_disable_symclk32_le( REG_UPDATE_2(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, 0, SYMCLK32_LE0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_LE0_GATE_DISABLE, 0); break; case 1: REG_UPDATE_2(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, 0, SYMCLK32_LE1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_ROOT_LE1_GATE_DISABLE, 0); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg31_disable_dscclk(struct dccg *dccg, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + //DTO must be enabled to generate a 0 Hz clock output + switch (inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK0_DTO_ENABLE, 1); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 1); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK1_DTO_ENABLE, 1); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 1); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK2_DTO_ENABLE, 1); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg31_enable_dscclk(struct dccg *dccg, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + //Disable DTO + switch (inst) { + case 0: + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 0); + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK0_DTO_ENABLE, 0); + break; + case 1: + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 0); + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK1_DTO_ENABLE, 0); + break; + case 2: + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 0); + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK2_DTO_ENABLE, 0); break; default: BREAK_TO_DEBUGGER(); @@ -398,10 +602,23 @@ void dccg31_init(struct dccg *dccg) dccg31_disable_symclk32_se(dccg, 1); dccg31_disable_symclk32_se(dccg, 2); dccg31_disable_symclk32_se(dccg, 3); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) { + dccg31_disable_symclk32_le(dccg, 0); + dccg31_disable_symclk32_le(dccg, 1); + } + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) { + dccg31_disable_dpstreamclk(dccg, 0); + dccg31_disable_dpstreamclk(dccg, 1); + dccg31_disable_dpstreamclk(dccg, 2); + dccg31_disable_dpstreamclk(dccg, 3); + } + } static const struct dccg_funcs dccg31_funcs = { - .update_dpp_dto = dccg2_update_dpp_dto, + .update_dpp_dto = dccg31_update_dpp_dto, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, .dccg_init = dccg31_init, .set_dpstreamclk = dccg31_set_dpstreamclk, @@ -413,6 +630,8 @@ static const struct dccg_funcs dccg31_funcs = { .set_dtbclk_dto = dccg31_set_dtbclk_dto, .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, + .disable_dsc = dccg31_disable_dscclk, + .enable_dsc = dccg31_enable_dscclk, }; struct dccg *dccg31_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h index 1e5aabcb7799..a013a32bbaf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h @@ -61,7 +61,13 @@ SR(DCCG_AUDIO_DTBCLK_DTO_MODULO),\ SR(DCCG_AUDIO_DTBCLK_DTO_PHASE),\ SR(DCCG_AUDIO_DTO_SOURCE),\ - SR(DENTIST_DISPCLK_CNTL) + SR(DENTIST_DISPCLK_CNTL),\ + SR(DSCCLK0_DTO_PARAM),\ + SR(DSCCLK1_DTO_PARAM),\ + SR(DSCCLK2_DTO_PARAM),\ + SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL3),\ + SR(HDMISTREAMCLK0_DTO_PARAM) #define DCCG_MASK_SH_LIST_DCN31(mask_sh) \ @@ -119,7 +125,26 @@ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, DIV, 3, mask_sh),\ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\ - DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh) + DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh), \ + DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_ENABLE, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_ENABLE, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, DPSTREAMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, DPSTREAMCLK_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh) struct dccg *dccg31_create( @@ -130,6 +155,11 @@ struct dccg *dccg31_create( void dccg31_init(struct dccg *dccg); +void dccg31_set_dpstreamclk( + struct dccg *dccg, + enum hdmistreamclk_source src, + int otg_inst); + void dccg31_enable_symclk32_se( struct dccg *dccg, int hpo_se_inst, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 18e33ef3d217..9a6ad1cebc85 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -49,6 +49,7 @@ #include "inc/link_dpcd.h" #include "dcn10/dcn10_hw_sequencer.h" #include "inc/link_enc_cfg.h" +#include "dcn30/dcn30_vpg.h" #define DC_LOGGER_INIT(logger) @@ -71,16 +72,11 @@ void dcn31_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; - int i; - int edp_num; + int i, j; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); - // Initialize the dccg - if (res_pool->dccg->funcs->dccg_init) - res_pool->dccg->funcs->dccg_init(res_pool->dccg); - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { REG_WRITE(REFCLK_CNTL, 0); @@ -107,6 +103,9 @@ void dcn31_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); hws->funcs.disable_vga(dc->hwseq); } + // Initialize the dccg + if (res_pool->dccg->funcs->dccg_init) + res_pool->dccg->funcs->dccg_init(res_pool->dccg); if (dc->debug.enable_mem_low_power.bits.dmcu) { // Force ERAM to shutdown if DMCU is not enabled @@ -126,6 +125,18 @@ void dcn31_init_hw(struct dc *dc) REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1); } +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) { + // Power down VPGs + for (i = 0; i < dc->res_pool->stream_enc_count; i++) + dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg); +#if defined(CONFIG_DRM_AMD_DC_DP2_0) + for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++) + dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg); +#endif + } +#endif + if (dc->ctx->dc_bios->fw_info_valid) { res_pool->ref_clocks.xtalin_clock_inKhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency; @@ -179,9 +190,40 @@ void dcn31_init_hw(struct dc *dc) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) - blank_all_dp_displays(dc, true); - + if (dc->config.power_down_display_on_boot) { + uint8_t dpcd_power_state = '\0'; + enum dc_status status = DC_ERROR_UNEXPECTED; + + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) + continue; + + /* if any of the displays are lit up turn them off */ + status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); + if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { + /* blank dp stream before power off receiver*/ + if (dc->links[i]->ep_type == DISPLAY_ENDPOINT_PHY && + dc->links[i]->link_enc->funcs->get_dig_frontend) { + unsigned int fe; + + fe = dc->links[i]->link_enc->funcs->get_dig_frontend( + dc->links[i]->link_enc); + if (fe == ENGINE_ID_UNKNOWN) + continue; + + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], + dc->res_pool->stream_enc[j]); + break; + } + } + } + dp_receiver_power_ctrl(dc->links[i], false); + } + } + } /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -196,48 +238,6 @@ void dcn31_init_hw(struct dc *dc) !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } - /* In headless boot cases, DIG may be turned - * on which causes HW/SW discrepancies. - * To avoid this, power down hardware on boot - * if DIG is turned on and seamless boot not enabled - */ - if (dc->config.power_down_display_on_boot) { - struct dc_link *edp_links[MAX_NUM_EDP]; - struct dc_link *edp_link; - bool power_down = false; - - get_edp_links(dc, edp_links, &edp_num); - if (edp_num) { - for (i = 0; i < edp_num; i++) { - edp_link = edp_links[i]; - if (edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwss.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - power_down = true; - } - } - } - if (!power_down) { - for (i = 0; i < dc->link_count; i++) { - struct dc_link *link = dc->links[i]; - - if (link->ep_type == DISPLAY_ENDPOINT_PHY && - link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); - break; - } - - } - } - } - for (i = 0; i < res_pool->audio_count; i++) { struct audio *audio = res_pool->audios[i]; @@ -300,6 +300,12 @@ void dcn31_dsc_pg_control( if (hws->ctx->dc->debug.disable_dsc_power_gate) return; + if (hws->ctx->dc->debug.root_clock_optimization.bits.dsc && + hws->ctx->dc->res_pool->dccg->funcs->enable_dsc && + power_on) + hws->ctx->dc->res_pool->dccg->funcs->enable_dsc( + hws->ctx->dc->res_pool->dccg, dsc_inst); + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -336,6 +342,13 @@ void dcn31_dsc_pg_control( if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); + + if (hws->ctx->dc->debug.root_clock_optimization.bits.dsc) { + if (hws->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on) + hws->ctx->dc->res_pool->dccg->funcs->disable_dsc( + hws->ctx->dc->res_pool->dccg, dsc_inst); + } + } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 7cb7604a35eb..2b4459909648 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -222,8 +222,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .num_states = 5, .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 402.0, - .sr_enter_plus_exit_z8_time_us = 520.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, @@ -998,7 +998,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 3840,/*upto 4K*/ + .max_downscale_src_width = 4096,/*upto true 4K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = false, @@ -1312,10 +1312,6 @@ static struct vpg *dcn31_vpg_create( &vpg_shift, &vpg_mask); - // Will re-enable hw block when we enable stream - // Check for enabled stream before powering down? - vpg31_powerdown(&vpg31->base); - return &vpg31->base; } @@ -1383,6 +1379,12 @@ static struct stream_encoder *dcn31_stream_encoder_create( return NULL; } + if (ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && + ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + if ((eng_id == ENGINE_ID_DIGC) || (eng_id == ENGINE_ID_DIGD)) + eng_id = eng_id + 3; // For B0 only. C->F, D->G. + } + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], @@ -1782,6 +1784,13 @@ static int dcn31_populate_dml_pipes_from_context( pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index ce55c9caf9a2..d58925cff420 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5398,9 +5398,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch - + dml_max4( - v->VActivePixelBandwidth[i][j][k], - v->VActiveCursorBandwidth[i][j][k] + + dml_max3( + v->VActivePixelBandwidth[i][j][k] + + v->VActiveCursorBandwidth[i][j][k] + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]), diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 09237d5819f4..c940fdfda144 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -123,6 +123,15 @@ struct dccg_funcs { void (*set_dispclk_change_mode)( struct dccg *dccg, enum dentist_dispclk_change_mode change_mode); + + void (*disable_dsc)( + struct dccg *dccg, + int inst); + + void (*enable_dsc)( + struct dccg *dccg, + int inst); + }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h index 83b2199b2c83..10dcf6a5e9b1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h @@ -97,7 +97,7 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream( const struct dc_stream_state *stream); /* Return true if encoder available to use. */ -bool link_enc_cfg_is_link_enc_avail(struct dc *dc, enum engine_id eng_id); +bool link_enc_cfg_is_link_enc_avail(struct dc *dc, enum engine_id eng_id, struct dc_link *link); /* Returns true if encoder assignments in supplied state pass validity checks. */ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 0b9d6bf4886d..4c436fb4624b 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -46,10 +46,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xf0c64c97 +#define DMUB_FW_VERSION_GIT_HASH 0xd146258f #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 87 +#define DMUB_FW_VERSION_REVISION 88 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 257f280d3d53..f1a46d16f7ea 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -228,7 +228,7 @@ enum DC_FEATURE_MASK { DC_FBC_MASK = (1 << 0), //0x1, disabled by default DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default - DC_PSR_MASK = (1 << 3), //0x8, disabled by default + DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1 DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default }; @@ -236,7 +236,8 @@ enum DC_DEBUG_MASK { DC_DISABLE_PIPE_SPLIT = 0x1, DC_DISABLE_STUTTER = 0x2, DC_DISABLE_DSC = 0x4, - DC_DISABLE_CLOCK_GATING = 0x8 + DC_DISABLE_CLOCK_GATING = 0x8, + DC_DISABLE_PSR = 0x10, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h index 312c50ea30f3..f268d33c4744 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h @@ -436,6 +436,8 @@ #define regPHYESYMCLK_CLOCK_CNTL_BASE_IDX 2 #define regDCCG_GATE_DISABLE_CNTL3 0x005a #define regDCCG_GATE_DISABLE_CNTL3_BASE_IDX 2 +#define regHDMISTREAMCLK0_DTO_PARAM 0x005b +#define regHDMISTREAMCLK0_DTO_PARAM_BASE_IDX 2 #define regDCCG_AUDIO_DTBCLK_DTO_PHASE 0x0061 #define regDCCG_AUDIO_DTBCLK_DTO_PHASE_BASE_IDX 2 #define regDCCG_AUDIO_DTBCLK_DTO_MODULO 0x0062 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h index a9d553ef26c0..1f21f313bd1d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h @@ -1438,6 +1438,14 @@ #define DCCG_GATE_DISABLE_CNTL3__SYMCLK32_LE0_GATE_DISABLE_MASK 0x00200000L #define DCCG_GATE_DISABLE_CNTL3__SYMCLK32_ROOT_LE1_GATE_DISABLE_MASK 0x00400000L #define DCCG_GATE_DISABLE_CNTL3__SYMCLK32_LE1_GATE_DISABLE_MASK 0x00800000L +//HDMISTREAMCLK0_DTO_PARAM +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_PHASE__SHIFT 0x0 +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_MODULO__SHIFT 0x8 +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_EN__SHIFT 0x10 +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_PHASE_MASK 0x000000FFL +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_MODULO_MASK 0x0000FF00L +#define HDMISTREAMCLK0_DTO_PARAM__HDMISTREAMCLK0_DTO_EN_MASK 0x00010000L + //DCCG_AUDIO_DTBCLK_DTO_PHASE #define DCCG_AUDIO_DTBCLK_DTO_PHASE__DCCG_AUDIO_DTBCLK_DTO_PHASE__SHIFT 0x0 #define DCCG_AUDIO_DTBCLK_DTO_PHASE__DCCG_AUDIO_DTBCLK_DTO_PHASE_MASK 0xFFFFFFFFL diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index c255b4b8e685..01cca08a774f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2019,15 +2019,15 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_sclk_od, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_mclk_od, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RW(pp_power_profile_mode, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_power_profile_mode, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_od_clk_voltage, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RO(gpu_busy_percent, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RO(mem_busy_percent, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(gpu_busy_percent, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RO(mem_busy_percent, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RO(gpu_metrics, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RO(gpu_metrics, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RO(smartshift_apu_power, ATTR_FLAG_BASIC, .attr_update = ss_power_attr_update), AMDGPU_DEVICE_ATTR_RO(smartshift_dgpu_power, ATTR_FLAG_BASIC, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 8156729c370b..3557f4e7fc30 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -1008,7 +1008,9 @@ struct pptable_funcs { /** * @set_power_limit: Set power limit in watts. */ - int (*set_power_limit)(struct smu_context *smu, uint32_t n); + int (*set_power_limit)(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit); /** * @init_max_sustainable_clocks: Populate max sustainable clock speed diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index cbdae8a2c698..2d422e6a9feb 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -197,7 +197,9 @@ int smu_v11_0_notify_display_change(struct smu_context *smu); int smu_v11_0_get_current_power_limit(struct smu_context *smu, uint32_t *power_limit); -int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n); +int smu_v11_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit); int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index dc91eb608791..e5d3b0d1a032 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -163,7 +163,9 @@ int smu_v13_0_notify_display_change(struct smu_context *smu); int smu_v13_0_get_current_power_limit(struct smu_context *smu, uint32_t *power_limit); -int smu_v13_0_set_power_limit(struct smu_context *smu, uint32_t n); +int smu_v13_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit); int smu_v13_0_init_max_sustainable_clocks(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 4ea7e90ef60d..b06c59dcc1b4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -455,6 +455,10 @@ static int smu_get_power_num_states(void *handle, bool is_support_sw_smu(struct amdgpu_device *adev) { + /* vega20 is 11.0.2, but it's supported via the powerplay code */ + if (adev->asic_type == CHIP_VEGA20) + return false; + if (adev->ip_versions[MP1_HWIP][0] >= IP_VERSION(11, 0, 0)) return true; @@ -2344,9 +2348,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit) mutex_lock(&smu->mutex); + limit &= (1<<24)-1; if (limit_type != SMU_DEFAULT_PPT_LIMIT) if (smu->ppt_funcs->set_power_limit) { - ret = smu->ppt_funcs->set_power_limit(smu, limit); + ret = smu->ppt_funcs->set_power_limit(smu, limit_type, limit); goto out; } @@ -2362,7 +2367,7 @@ static int smu_set_power_limit(void *handle, uint32_t limit) limit = smu->current_power_limit; if (smu->ppt_funcs->set_power_limit) { - ret = smu->ppt_funcs->set_power_limit(smu, limit); + ret = smu->ppt_funcs->set_power_limit(smu, limit_type, limit); if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) smu->user_dpm_profile.power_limit = limit; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 082f01893f3d..fd1d30a93db5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -436,6 +436,19 @@ static void arcturus_check_bxco_support(struct smu_context *smu) } } +static void arcturus_check_fan_support(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + + /* No sort of fan control possible if PPTable has it disabled */ + smu->adev->pm.no_fan = + !(pptable->FeaturesToRun[0] & FEATURE_FAN_CONTROL_MASK); + if (smu->adev->pm.no_fan) + dev_info_once(smu->adev->dev, + "PMFW based fan control disabled"); +} + static int arcturus_check_powerplay_table(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; @@ -443,6 +456,7 @@ static int arcturus_check_powerplay_table(struct smu_context *smu) table_context->power_play_table; arcturus_check_bxco_support(smu); + arcturus_check_fan_support(smu); table_context->thermal_controller_type = powerplay_table->thermal_controller_type; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index 3d4c65bc29dc..cbc3f99e8573 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -47,7 +47,6 @@ /* unit: MHz */ #define CYAN_SKILLFISH_SCLK_MIN 1000 #define CYAN_SKILLFISH_SCLK_MAX 2000 -#define CYAN_SKILLFISH_SCLK_DEFAULT 1800 /* unit: mV */ #define CYAN_SKILLFISH_VDDC_MIN 700 @@ -59,6 +58,8 @@ static struct gfx_user_settings { uint32_t vddc; } cyan_skillfish_user_settings; +static uint32_t cyan_skillfish_sclk_default; + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_FCLK_DPM_BIT) | \ @@ -365,13 +366,19 @@ static bool cyan_skillfish_is_dpm_running(struct smu_context *smu) return false; ret = smu_cmn_get_enabled_32_bits_mask(smu, feature_mask, 2); - if (ret) return false; feature_enabled = (uint64_t)feature_mask[0] | ((uint64_t)feature_mask[1] << 32); + /* + * cyan_skillfish specific, query default sclk inseted of hard code. + */ + if (!cyan_skillfish_sclk_default) + cyan_skillfish_get_smu_metrics_data(smu, METRICS_CURR_GFXCLK, + &cyan_skillfish_sclk_default); + return !!(feature_enabled & SMC_DPM_FEATURE); } @@ -444,14 +451,14 @@ static int cyan_skillfish_od_edit_dpm_table(struct smu_context *smu, return -EINVAL; } - if (input[1] <= CYAN_SKILLFISH_SCLK_MIN || + if (input[1] < CYAN_SKILLFISH_SCLK_MIN || input[1] > CYAN_SKILLFISH_SCLK_MAX) { dev_err(smu->adev->dev, "Invalid sclk! Valid sclk range: %uMHz - %uMhz\n", CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); return -EINVAL; } - if (input[2] <= CYAN_SKILLFISH_VDDC_MIN || + if (input[2] < CYAN_SKILLFISH_VDDC_MIN || input[2] > CYAN_SKILLFISH_VDDC_MAX) { dev_err(smu->adev->dev, "Invalid vddc! Valid vddc range: %umV - %umV\n", CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); @@ -468,7 +475,7 @@ static int cyan_skillfish_od_edit_dpm_table(struct smu_context *smu, return -EINVAL; } - cyan_skillfish_user_settings.sclk = CYAN_SKILLFISH_SCLK_DEFAULT; + cyan_skillfish_user_settings.sclk = cyan_skillfish_sclk_default; cyan_skillfish_user_settings.vddc = CYAN_SKILLFISH_VDDC_MAGIC; break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 15e66e1912de..a4108025fe29 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -670,7 +670,7 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_dpm_table *dpm_table; struct amdgpu_device *adev = smu->adev; - int ret = 0; + int i, ret = 0; DpmDescriptor_t *table_member; /* socclk dpm table setup */ @@ -746,78 +746,45 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) dpm_table->max = dpm_table->dpm_levels[0].value; } - /* vclk0 dpm table setup */ - dpm_table = &dpm_context->dpm_tables.vclk_table; - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_v11_0_set_single_dpm_table(smu, - SMU_VCLK, - dpm_table); - if (ret) - return ret; - dpm_table->is_fine_grained = - !table_member[PPCLK_VCLK_0].SnapToDiscrete; - } else { - dpm_table->count = 1; - dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; - dpm_table->dpm_levels[0].enabled = true; - dpm_table->min = dpm_table->dpm_levels[0].value; - dpm_table->max = dpm_table->dpm_levels[0].value; - } + /* vclk0/1 dpm table setup */ + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - /* vclk1 dpm table setup */ - if (adev->vcn.num_vcn_inst > 1) { - dpm_table = &dpm_context->dpm_tables.vclk1_table; + dpm_table = &dpm_context->dpm_tables.vclk_table; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_v11_0_set_single_dpm_table(smu, - SMU_VCLK1, + i ? SMU_VCLK1 : SMU_VCLK, dpm_table); if (ret) return ret; dpm_table->is_fine_grained = - !table_member[PPCLK_VCLK_1].SnapToDiscrete; + !table_member[i ? PPCLK_VCLK_1 : PPCLK_VCLK_0].SnapToDiscrete; } else { dpm_table->count = 1; - dpm_table->dpm_levels[0].value = - smu->smu_table.boot_values.vclk / 100; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; dpm_table->dpm_levels[0].enabled = true; dpm_table->min = dpm_table->dpm_levels[0].value; dpm_table->max = dpm_table->dpm_levels[0].value; } } - /* dclk0 dpm table setup */ - dpm_table = &dpm_context->dpm_tables.dclk_table; - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_v11_0_set_single_dpm_table(smu, - SMU_DCLK, - dpm_table); - if (ret) - return ret; - dpm_table->is_fine_grained = - !table_member[PPCLK_DCLK_0].SnapToDiscrete; - } else { - dpm_table->count = 1; - dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; - dpm_table->dpm_levels[0].enabled = true; - dpm_table->min = dpm_table->dpm_levels[0].value; - dpm_table->max = dpm_table->dpm_levels[0].value; - } - - /* dclk1 dpm table setup */ - if (adev->vcn.num_vcn_inst > 1) { - dpm_table = &dpm_context->dpm_tables.dclk1_table; + /* dclk0/1 dpm table setup */ + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + dpm_table = &dpm_context->dpm_tables.dclk_table; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_v11_0_set_single_dpm_table(smu, - SMU_DCLK1, + i ? SMU_DCLK1 : SMU_DCLK, dpm_table); if (ret) return ret; dpm_table->is_fine_grained = - !table_member[PPCLK_DCLK_1].SnapToDiscrete; + !table_member[i ? PPCLK_DCLK_1 : PPCLK_DCLK_0].SnapToDiscrete; } else { dpm_table->count = 1; - dpm_table->dpm_levels[0].value = - smu->smu_table.boot_values.dclk / 100; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; dpm_table->dpm_levels[0].enabled = true; dpm_table->min = dpm_table->dpm_levels[0].value; dpm_table->max = dpm_table->dpm_levels[0].value; @@ -902,32 +869,18 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { struct amdgpu_device *adev = smu->adev; - int ret = 0; + int i, ret = 0; - if (enable) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; /* vcn dpm on is a prerequisite for vcn power gate messages */ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + 0x10000 * i, NULL); if (ret) return ret; - if (adev->vcn.num_vcn_inst > 1) { - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, - 0x10000, NULL); - if (ret) - return ret; - } - } - } else { - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, 0, NULL); - if (ret) - return ret; - if (adev->vcn.num_vcn_inst > 1) { - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, - 0x10000, NULL); - if (ret) - return ret; - } } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 3470c33ee09d..28b7c0562b99 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -255,7 +255,7 @@ int smu_v11_0_check_fw_version(struct smu_context *smu) case IP_VERSION(11, 0, 11): smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Navy_Flounder; break; - case CHIP_VANGOGH: + case IP_VERSION(11, 5, 0): smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_VANGOGH; break; case IP_VERSION(11, 0, 12): @@ -755,6 +755,7 @@ int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) */ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 11) || adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 5, 0) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 12) || adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 13)) return 0; @@ -978,10 +979,16 @@ int smu_v11_0_get_current_power_limit(struct smu_context *smu, return ret; } -int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) +int smu_v11_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) { int power_src; int ret = 0; + uint32_t limit_param; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { dev_err(smu->adev->dev, "Setting new power limit is not supported!\n"); @@ -1001,16 +1008,16 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) * BIT 16-23: PowerSource * BIT 0-15: PowerLimit */ - n &= 0xFFFF; - n |= 0 << 24; - n |= (power_src) << 16; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n, NULL); + limit_param = (limit & 0xFFFF); + limit_param |= 0 << 24; + limit_param |= (power_src) << 16; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, limit_param, NULL); if (ret) { dev_err(smu->adev->dev, "[%s] Set power limit Failed!\n", __func__); return ret; } - smu->current_power_limit = n; + smu->current_power_limit = limit; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index f6ef0ce6e9e2..421f38e8dada 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1386,52 +1386,38 @@ static int vangogh_set_performance_level(struct smu_context *smu, uint32_t soc_mask, mclk_mask, fclk_mask; uint32_t vclk_mask = 0, dclk_mask = 0; + smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; + smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; + switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: - smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_hard_min_freq = smu->gfx_default_soft_max_freq; smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; ret = vangogh_force_dpm_limit_value(smu, true); + if (ret) + return ret; break; case AMD_DPM_FORCED_LEVEL_LOW: smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; - smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_hard_min_freq; ret = vangogh_force_dpm_limit_value(smu, false); + if (ret) + return ret; break; case AMD_DPM_FORCED_LEVEL_AUTO: smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; - ret = vangogh_unforce_dpm_levels(smu); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: - smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; - smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetHardMinGfxClk, - VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL); - if (ret) - return ret; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetSoftMaxGfxClk, - VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL); if (ret) return ret; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + smu->gfx_actual_hard_min_freq = VANGOGH_UMD_PSTATE_STANDARD_GFXCLK; + smu->gfx_actual_soft_max_freq = VANGOGH_UMD_PSTATE_STANDARD_GFXCLK; ret = vangogh_get_profiling_clk_mask(smu, level, &vclk_mask, @@ -1446,32 +1432,15 @@ static int vangogh_set_performance_level(struct smu_context *smu, vangogh_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); vangogh_force_clk_levels(smu, SMU_VCLK, 1 << vclk_mask); vangogh_force_clk_levels(smu, SMU_DCLK, 1 << dclk_mask); - break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; - smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; - - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinVcn, - VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL); - if (ret) - return ret; - - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxVcn, - VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL); - if (ret) - return ret; + smu->gfx_actual_soft_max_freq = smu->gfx_default_hard_min_freq; break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; - ret = vangogh_get_profiling_clk_mask(smu, level, NULL, NULL, @@ -1484,29 +1453,29 @@ static int vangogh_set_performance_level(struct smu_context *smu, vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask); break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: - smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; - smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; - - smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq; - smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq; - - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, - VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL); - if (ret) - return ret; + smu->gfx_actual_hard_min_freq = VANGOGH_UMD_PSTATE_PEAK_GFXCLK; + smu->gfx_actual_soft_max_freq = VANGOGH_UMD_PSTATE_PEAK_GFXCLK; - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, - VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL); + ret = vangogh_set_peak_clock_by_device(smu); if (ret) return ret; - - ret = vangogh_set_peak_clock_by_device(smu); break; case AMD_DPM_FORCED_LEVEL_MANUAL: case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: - break; + return 0; } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, + smu->gfx_actual_hard_min_freq, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, + smu->gfx_actual_soft_max_freq, NULL); + if (ret) + return ret; + return ret; } @@ -2144,11 +2113,12 @@ static int vangogh_get_ppt_limit(struct smu_context *smu, return 0; } -static int vangogh_set_power_limit(struct smu_context *smu, uint32_t ppt_limit) +static int vangogh_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t ppt_limit) { struct smu_11_5_power_context *power_context = - smu->smu_power.power_context; - uint32_t limit_type = ppt_limit >> 24; + smu->smu_power.power_context; int ret = 0; if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 5019903db492..59a7d276541d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1241,11 +1241,13 @@ static int aldebaran_get_power_limit(struct smu_context *smu, return 0; } -static int aldebaran_set_power_limit(struct smu_context *smu, uint32_t n) +static int aldebaran_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) { /* Power limit can be set only through primary die */ if (aldebaran_is_primary(smu)) - return smu_v13_0_set_power_limit(smu, n); + return smu_v13_0_set_power_limit(smu, limit_type, limit); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 05c5e61f3506..35145db6eedf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -945,22 +945,27 @@ int smu_v13_0_get_current_power_limit(struct smu_context *smu, return ret; } -int smu_v13_0_set_power_limit(struct smu_context *smu, uint32_t n) +int smu_v13_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) { int ret = 0; + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { dev_err(smu->adev->dev, "Setting new power limit is not supported!\n"); return -EOPNOTSUPP; } - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, limit, NULL); if (ret) { dev_err(smu->adev->dev, "[%s] Set power limit Failed!\n", __func__); return ret; } - smu->current_power_limit = n; + smu->current_power_limit = limit; return 0; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 6bfaefa01818..1e30eaeb0e1b 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1300,18 +1300,6 @@ static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, return flags; } -static enum drm_connector_status ast_connector_detect(struct drm_connector - *connector, bool force) -{ - int r; - - r = ast_get_modes(connector); - if (r <= 0) - return connector_status_disconnected; - - return connector_status_connected; -} - static void ast_connector_destroy(struct drm_connector *connector) { struct ast_connector *ast_connector = to_ast_connector(connector); @@ -1327,7 +1315,6 @@ static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { static const struct drm_connector_funcs ast_connector_funcs = { .reset = drm_atomic_helper_connector_reset, - .detect = ast_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = ast_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -1355,8 +1342,7 @@ static int ast_connector_init(struct drm_device *dev) connector->interlace_allowed = 0; connector->doublescan_allowed = 0; - connector->polled = DRM_CONNECTOR_POLL_CONNECT | - DRM_CONNECTOR_POLL_DISCONNECT; + connector->polled = DRM_CONNECTOR_POLL_CONNECT; drm_connector_attach_encoder(connector, encoder); @@ -1425,8 +1411,6 @@ int ast_mode_config_init(struct ast_private *ast) drm_mode_config_reset(dev); - drm_kms_helper_poll_init(dev); - return 0; } diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 86d13d6bc463..571da0c2f39f 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3355,6 +3355,10 @@ static int drm_dp_destroy_payload_step2(struct drm_dp_mst_topology_mgr *mgr, /** * drm_dp_update_payload_part1() - Execute payload update part 1 * @mgr: manager to use. + * @start_slot: this is the cur slot + * + * NOTE: start_slot is a temporary workaround for non-atomic drivers, + * this will be removed when non-atomic mst helpers are moved out of the helper * * This iterates over all proposed virtual channels, and tries to * allocate space in the link for them. For 0->slots transitions, @@ -3365,12 +3369,12 @@ static int drm_dp_destroy_payload_step2(struct drm_dp_mst_topology_mgr *mgr, * after calling this the driver should generate ACT and payload * packets. */ -int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) +int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr, int start_slot) { struct drm_dp_payload req_payload; struct drm_dp_mst_port *port; int i, j; - int cur_slots = 1; + int cur_slots = start_slot; bool skip; mutex_lock(&mgr->payload_lock); @@ -4334,10 +4338,6 @@ static int drm_dp_init_vcpi(struct drm_dp_mst_topology_mgr *mgr, { int ret; - /* max. time slots - one slot for MTP header */ - if (slots > 63) - return -ENOSPC; - vcpi->pbn = pbn; vcpi->aligned_pbn = slots * mgr->pbn_div; vcpi->num_slots = slots; @@ -4510,6 +4510,27 @@ int drm_dp_atomic_release_vcpi_slots(struct drm_atomic_state *state, EXPORT_SYMBOL(drm_dp_atomic_release_vcpi_slots); /** + * drm_dp_mst_update_slots() - updates the slot info depending on the DP ecoding format + * @mst_state: mst_state to update + * @link_encoding_cap: the ecoding format on the link + */ +void drm_dp_mst_update_slots(struct drm_dp_mst_topology_state *mst_state, uint8_t link_encoding_cap) +{ + if (link_encoding_cap == DP_CAP_ANSI_128B132B) { + mst_state->total_avail_slots = 64; + mst_state->start_slot = 0; + } else { + mst_state->total_avail_slots = 63; + mst_state->start_slot = 1; + } + + DRM_DEBUG_KMS("%s encoding format on mst_state 0x%p\n", + (link_encoding_cap == DP_CAP_ANSI_128B132B) ? "128b/132b":"8b/10b", + mst_state); +} +EXPORT_SYMBOL(drm_dp_mst_update_slots); + +/** * drm_dp_mst_allocate_vcpi() - Allocate a virtual channel * @mgr: manager for this port * @port: port to allocate a virtual channel for. @@ -4540,7 +4561,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, ret = drm_dp_init_vcpi(mgr, &port->vcpi, pbn, slots); if (ret) { - drm_dbg_kms(mgr->dev, "failed to init vcpi slots=%d max=63 ret=%d\n", + drm_dbg_kms(mgr->dev, "failed to init vcpi slots=%d ret=%d\n", DIV_ROUND_UP(pbn, mgr->pbn_div), ret); drm_dp_mst_topology_put_port(port); goto out; @@ -5228,7 +5249,7 @@ drm_dp_mst_atomic_check_vcpi_alloc_limit(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state) { struct drm_dp_vcpi_allocation *vcpi; - int avail_slots = 63, payload_count = 0; + int avail_slots = mst_state->total_avail_slots, payload_count = 0; list_for_each_entry(vcpi, &mst_state->vcpis, next) { /* Releasing VCPI is always OK-even if the port is gone */ @@ -5257,7 +5278,7 @@ drm_dp_mst_atomic_check_vcpi_alloc_limit(struct drm_dp_mst_topology_mgr *mgr, } } drm_dbg_atomic(mgr->dev, "[MST MGR:%p] mst state %p VCPI avail=%d used=%d\n", - mgr, mst_state, avail_slots, 63 - avail_slots); + mgr, mst_state, avail_slots, mst_state->total_avail_slots - avail_slots); return 0; } @@ -5534,6 +5555,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, if (mst_state == NULL) return -ENOMEM; + mst_state->total_avail_slots = 63; + mst_state->start_slot = 1; + mst_state->mgr = mgr; INIT_LIST_HEAD(&mst_state->vcpis); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7aa2a56a71c8..12893e7be89b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1846,11 +1846,20 @@ static void connector_bad_edid(struct drm_connector *connector, u8 *edid, int num_blocks) { int i; - u8 num_of_ext = edid[0x7e]; + u8 last_block; + + /* + * 0x7e in the EDID is the number of extension blocks. The EDID + * is 1 (base block) + num_ext_blocks big. That means we can think + * of 0x7e in the EDID of the _index_ of the last block in the + * combined chunk of memory. + */ + last_block = edid[0x7e]; /* Calculate real checksum for the last edid extension block data */ - connector->real_edid_checksum = - drm_edid_block_checksum(edid + num_of_ext * EDID_LENGTH); + if (last_block < num_blocks) + connector->real_edid_checksum = + drm_edid_block_checksum(edid + last_block * EDID_LENGTH); if (connector->bad_edid_counter++ && !drm_debug_enabled(DRM_UT_KMS)) return; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3ab078321045..8e7a124d6c5a 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1506,6 +1506,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; + struct drm_mode_config *config = &dev->mode_config; int ret = 0; int crtc_count = 0; struct drm_connector_list_iter conn_iter; @@ -1663,6 +1664,11 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, /* Handle our overallocation */ sizes.surface_height *= drm_fbdev_overalloc; sizes.surface_height /= 100; + if (sizes.surface_height > config->max_height) { + drm_dbg_kms(dev, "Fbdev over-allocation too large; clamping height to %d\n", + config->max_height); + sizes.surface_height = config->max_height; + } /* push down into drivers */ ret = (*fb_helper->funcs->fb_probe)(fb_helper, &sizes); diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 783844bfecc1..25837b1d6639 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -134,6 +134,8 @@ const struct drm_format_info *__drm_format_info(u32 format) static const struct drm_format_info formats[] = { { .format = DRM_FORMAT_C8, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R8, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_R10, .depth = 10, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, + { .format = DRM_FORMAT_R12, .depth = 12, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGB332, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGR233, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XRGB4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index cc5b07f86346..242a5fd8b932 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1733,7 +1733,6 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, DBG("%s", dev_name(gpu->dev)); - flush_workqueue(gpu->wq); destroy_workqueue(gpu->wq); etnaviv_sched_fini(gpu); diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 9870c4e6af36..b5001db7a95c 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -793,7 +793,6 @@ static int exynos5433_decon_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct decon_context *ctx; - struct resource *res; int ret; int i; @@ -818,8 +817,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev) ctx->clks[i] = clk; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->addr = devm_ioremap_resource(dev, res); + ctx->addr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->addr)) return PTR_ERR(ctx->addr); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index e39fac889edc..8d137857818c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1738,7 +1738,6 @@ static const struct component_ops exynos_dsi_component_ops = { static int exynos_dsi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct resource *res; struct exynos_dsi *dsi; int ret, i; @@ -1789,8 +1788,7 @@ static int exynos_dsi_probe(struct platform_device *pdev) } } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dsi->reg_base = devm_ioremap_resource(dev, res); + dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(dsi->reg_base)) return PTR_ERR(dsi->reg_base); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index a3c718148c45..ecfd82d0afb7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -85,7 +85,6 @@ struct fimc_scaler { /* * A structure of fimc context. * - * @regs_res: register resources. * @regs: memory mapped io registers. * @lock: locking of operations. * @clocks: fimc clocks. @@ -103,7 +102,6 @@ struct fimc_context { struct exynos_drm_ipp_formats *formats; unsigned int num_formats; - struct resource *regs_res; void __iomem *regs; spinlock_t lock; struct clk *clocks[FIMC_CLKS_MAX]; @@ -1327,8 +1325,7 @@ static int fimc_probe(struct platform_device *pdev) ctx->num_formats = num_formats; /* resource memory */ - ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->regs = devm_ioremap_resource(dev, ctx->regs_res); + ctx->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->regs)) return PTR_ERR(ctx->regs); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 700ca4fa6665..c735e53939d8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -1202,9 +1202,7 @@ static int fimd_probe(struct platform_device *pdev) return PTR_ERR(ctx->lcd_clk); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - ctx->regs = devm_ioremap_resource(dev, res); + ctx->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->regs)) return PTR_ERR(ctx->regs); diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index b00230626c6a..471fd6c8135f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1449,7 +1449,6 @@ static const struct component_ops g2d_component_ops = { static int g2d_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct resource *res; struct g2d_data *g2d; int ret; @@ -1491,9 +1490,7 @@ static int g2d_probe(struct platform_device *pdev) clear_bit(G2D_BIT_SUSPEND_RUNQUEUE, &g2d->flags); clear_bit(G2D_BIT_ENGINE_BUSY, &g2d->flags); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - g2d->regs = devm_ioremap_resource(dev, res); + g2d->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(g2d->regs)) { ret = PTR_ERR(g2d->regs); goto err_put_clk; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 90d7bf906885..166a80262896 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -86,7 +86,6 @@ struct gsc_scaler { /* * A structure of gsc context. * - * @regs_res: register resources. * @regs: memory mapped io registers. * @gsc_clk: gsc gate clock. * @sc: scaler infomations. @@ -103,7 +102,6 @@ struct gsc_context { struct exynos_drm_ipp_formats *formats; unsigned int num_formats; - struct resource *regs_res; void __iomem *regs; const char **clk_names; struct clk *clocks[GSC_MAX_CLOCKS]; @@ -1272,9 +1270,7 @@ static int gsc_probe(struct platform_device *pdev) } } - /* resource memory */ - ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->regs = devm_ioremap_resource(dev, ctx->regs_res); + ctx->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->regs)) return PTR_ERR(ctx->regs); diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index ee61be4cf152..dec7df35baa9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -278,7 +278,6 @@ static const struct component_ops rotator_component_ops = { static int rotator_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct resource *regs_res; struct rot_context *rot; const struct rot_variant *variant; int irq; @@ -292,8 +291,7 @@ static int rotator_probe(struct platform_device *pdev) rot->formats = variant->formats; rot->num_formats = variant->num_formats; rot->dev = dev; - regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rot->regs = devm_ioremap_resource(dev, regs_res); + rot->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(rot->regs)) return PTR_ERR(rot->regs); diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index f9ae5b038d59..3a7851b7dc66 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -485,7 +485,6 @@ static const struct component_ops scaler_component_ops = { static int scaler_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct resource *regs_res; struct scaler_context *scaler; int irq; int ret, i; @@ -498,8 +497,7 @@ static int scaler_probe(struct platform_device *pdev) (struct scaler_data *)of_device_get_match_data(dev); scaler->dev = dev; - regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - scaler->regs = devm_ioremap_resource(dev, regs_res); + scaler->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(scaler->regs)) return PTR_ERR(scaler->regs); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index c769dec576de..7655142a4651 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1957,7 +1957,6 @@ static int hdmi_probe(struct platform_device *pdev) struct hdmi_audio_infoframe *audio_infoframe; struct device *dev = &pdev->dev; struct hdmi_context *hdata; - struct resource *res; int ret; hdata = devm_kzalloc(dev, sizeof(struct hdmi_context), GFP_KERNEL); @@ -1979,8 +1978,7 @@ static int hdmi_probe(struct platform_device *pdev) return ret; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hdata->regs = devm_ioremap_resource(dev, res); + hdata->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hdata->regs)) { ret = PTR_ERR(hdata->regs); return ret; diff --git a/drivers/gpu/drm/hyperv/hyperv_drm.h b/drivers/gpu/drm/hyperv/hyperv_drm.h index 886add4f9cd0..d2d8582b36df 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm.h +++ b/drivers/gpu/drm/hyperv/hyperv_drm.h @@ -46,6 +46,7 @@ int hyperv_mode_config_init(struct hyperv_drm_device *hv); int hyperv_update_vram_location(struct hv_device *hdev, phys_addr_t vram_pp); int hyperv_update_situation(struct hv_device *hdev, u8 active, u32 bpp, u32 w, u32 h, u32 pitch); +int hyperv_hide_hw_ptr(struct hv_device *hdev); int hyperv_update_dirt(struct hv_device *hdev, struct drm_rect *rect); int hyperv_connect_vsp(struct hv_device *hdev); diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c index 6dd4717d3e1e..8c97a20dfe23 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c @@ -101,6 +101,7 @@ static void hyperv_pipe_enable(struct drm_simple_display_pipe *pipe, struct hyperv_drm_device *hv = to_hv(pipe->crtc.dev); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); + hyperv_hide_hw_ptr(hv->hdev); hyperv_update_situation(hv->hdev, 1, hv->screen_depth, crtc_state->mode.hdisplay, crtc_state->mode.vdisplay, diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c index 6d4bdccfbd1a..c0155c6271bf 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c @@ -299,6 +299,55 @@ int hyperv_update_situation(struct hv_device *hdev, u8 active, u32 bpp, return 0; } +/* + * Hyper-V supports a hardware cursor feature. It's not used by Linux VM, + * but the Hyper-V host still draws a point as an extra mouse pointer, + * which is unwanted, especially when Xorg is running. + * + * The hyperv_fb driver uses synthvid_send_ptr() to hide the unwanted + * pointer, by setting msg.ptr_pos.is_visible = 1 and setting the + * msg.ptr_shape.data. Note: setting msg.ptr_pos.is_visible to 0 doesn't + * work in tests. + * + * Copy synthvid_send_ptr() to hyperv_drm and rename it to + * hyperv_hide_hw_ptr(). Note: hyperv_hide_hw_ptr() is also called in the + * handler of the SYNTHVID_FEATURE_CHANGE event, otherwise the host still + * draws an extra unwanted mouse pointer after the VM Connection window is + * closed and reopened. + */ +int hyperv_hide_hw_ptr(struct hv_device *hdev) +{ + struct synthvid_msg msg; + + memset(&msg, 0, sizeof(struct synthvid_msg)); + msg.vid_hdr.type = SYNTHVID_POINTER_POSITION; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_pointer_position); + msg.ptr_pos.is_visible = 1; + msg.ptr_pos.video_output = 0; + msg.ptr_pos.image_x = 0; + msg.ptr_pos.image_y = 0; + hyperv_sendpacket(hdev, &msg); + + memset(&msg, 0, sizeof(struct synthvid_msg)); + msg.vid_hdr.type = SYNTHVID_POINTER_SHAPE; + msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) + + sizeof(struct synthvid_pointer_shape); + msg.ptr_shape.part_idx = SYNTHVID_CURSOR_COMPLETE; + msg.ptr_shape.is_argb = 1; + msg.ptr_shape.width = 1; + msg.ptr_shape.height = 1; + msg.ptr_shape.hot_x = 0; + msg.ptr_shape.hot_y = 0; + msg.ptr_shape.data[0] = 0; + msg.ptr_shape.data[1] = 1; + msg.ptr_shape.data[2] = 1; + msg.ptr_shape.data[3] = 1; + hyperv_sendpacket(hdev, &msg); + + return 0; +} + int hyperv_update_dirt(struct hv_device *hdev, struct drm_rect *rect) { struct hyperv_drm_device *hv = hv_get_drvdata(hdev); @@ -392,8 +441,11 @@ static void hyperv_receive_sub(struct hv_device *hdev) return; } - if (msg->vid_hdr.type == SYNTHVID_FEATURE_CHANGE) + if (msg->vid_hdr.type == SYNTHVID_FEATURE_CHANGE) { hv->dirt_needed = msg->feature_chg.is_dirt_needed; + if (hv->dirt_needed) + hyperv_hide_hw_ptr(hv->hdev); + } } static void hyperv_receive(void *ctx) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a3faea320105..b8bc7d397c81 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -366,7 +366,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port); - ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); + ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, 1); if (ret) { drm_dbg_kms(&i915->drm, "failed to update payload %d\n", ret); } @@ -509,7 +509,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, intel_dp->active_mst_links++; - ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); + ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, 1); /* * Before Gen 12 this is not done as part of diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..7358bebef15c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -4,6 +4,8 @@ * Copyright © 2014-2016 Intel Corporation */ +#include <linux/dma-fence-array.h> + #include "gt/intel_engine.h" #include "i915_gem_ioctls.h" @@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) +__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) * to eventually flush us, but to minimise latency just ask the * hardware. * - * Note we only report on the status of native fences. + * Note we only report on the status of native fences and we currently + * have two native fences: + * + * 1. A composite fence (dma_fence_array) constructed of i915 requests + * created during a parallel submission. In this case we deconstruct the + * composite fence into individual i915 requests and check the status of + * each request. + * + * 2. A single i915 request. */ - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + /* Not an i915 fence, can't be busy per above */ + if (!dma_fence_is_i915(current_fence) || + !test_bit(I915_FENCE_FLAG_COMPOSITE, + ¤t_fence->flags)) { + return 0; + } + + rq = to_request(current_fence); + if (!i915_request_completed(rq)) + return flag(rq->engine->uabi_class); + } while (--nchild); + + /* All requests in array complete, not busy */ return 0; + } else { + if (!dma_fence_is_i915(fence)) + return 0; - /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, const struct i915_request, fence); - if (i915_request_completed(rq)) - return 0; + rq = to_request(fence); + if (i915_request_completed(rq)) + return 0; - /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); - return flag(rq->engine->uabi_class); + /* Beware type-expansion follies! */ + BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } } static __always_inline unsigned int -busy_check_reader(const struct dma_fence *fence) +busy_check_reader(struct dma_fence *fence) { return __busy_set_if_active(fence, __busy_read_flag); } static __always_inline unsigned int -busy_check_writer(const struct dma_fence *fence) +busy_check_writer(struct dma_fence *fence) { if (!fence) return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 30759b651180..ebd775cb1661 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) return 0; } +static int +set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, + void *data) +{ + struct i915_context_engines_parallel_submit __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + u64 flags; + int err = 0, n, i, j; + u16 slot, width, num_siblings; + struct intel_engine_cs **siblings = NULL; + intel_engine_mask_t prev_mask; + + /* FIXME: This is NIY for execlists */ + if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) + return -ENODEV; + + if (get_user(slot, &ext->engine_index)) + return -EFAULT; + + if (get_user(width, &ext->width)) + return -EFAULT; + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + if (slot >= set->num_engines) { + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + slot, set->num_engines); + return -EINVAL; + } + + if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", slot); + return -EINVAL; + } + + if (get_user(flags, &ext->flags)) + return -EFAULT; + + if (flags) { + drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags); + return -EINVAL; + } + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (width < 2) { + drm_dbg(&i915->drm, "Width (%d) < 2\n", width); + return -EINVAL; + } + + if (num_siblings < 1) { + drm_dbg(&i915->drm, "Number siblings (%d) < 1\n", + num_siblings); + return -EINVAL; + } + + siblings = kmalloc_array(num_siblings * width, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return -ENOMEM; + + /* Create contexts / engines */ + for (i = 0; i < width; ++i) { + intel_engine_mask_t current_mask = 0; + struct i915_engine_class_instance prev_engine; + + for (j = 0; j < num_siblings; ++j) { + struct i915_engine_class_instance ci; + + n = i * num_siblings + j; + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto out_err; + } + + siblings[n] = + intel_engine_lookup_user(i915, ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto out_err; + } + + if (n) { + if (prev_engine.engine_class != + ci.engine_class) { + drm_dbg(&i915->drm, + "Mismatched class %d, %d\n", + prev_engine.engine_class, + ci.engine_class); + err = -EINVAL; + goto out_err; + } + } + + prev_engine = ci; + current_mask |= siblings[n]->logical_mask; + } + + if (i > 0) { + if (current_mask != prev_mask << 1) { + drm_dbg(&i915->drm, + "Non contiguous logical mask 0x%x, 0x%x\n", + prev_mask, current_mask); + err = -EINVAL; + goto out_err; + } + } + prev_mask = current_mask; + } + + set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL; + set->engines[slot].num_siblings = num_siblings; + set->engines[slot].width = width; + set->engines[slot].siblings = siblings; + + return 0; + +out_err: + kfree(siblings); + + return err; +} + static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, + [I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] = + set_proto_ctx_engines_parallel_submit, }; static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, @@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce, GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); + GEM_BUG_ON(intel_context_is_pinned(ce)); ce->ring_size = SZ_16K; i915_vm_put(ce->vm); @@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce, return ret; } +static void __unpin_engines(struct i915_gem_engines *e, unsigned int count) +{ + while (count--) { + struct intel_context *ce = e->engines[count], *child; + + if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags)) + continue; + + for_each_child(ce, child) + intel_context_unpin(child); + intel_context_unpin(ce); + } +} + +static void unpin_engines(struct i915_gem_engines *e) +{ + __unpin_engines(e, e->num_engines); +} + static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { @@ -933,6 +1091,40 @@ free_engines: return err; } +static int perma_pin_contexts(struct intel_context *ce) +{ + struct intel_context *child; + int i = 0, j = 0, ret; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + ret = intel_context_pin(ce); + if (unlikely(ret)) + return ret; + + for_each_child(ce, child) { + ret = intel_context_pin(child); + if (unlikely(ret)) + goto unwind; + ++i; + } + + set_bit(CONTEXT_PERMA_PIN, &ce->flags); + + return 0; + +unwind: + intel_context_unpin(ce); + for_each_child(ce, child) { + if (j++ < i) + intel_context_unpin(child); + else + break; + } + + return ret; +} + static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, unsigned int num_engines, struct i915_gem_proto_engine *pe) @@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, e->num_engines = num_engines; for (n = 0; n < num_engines; n++) { - struct intel_context *ce; + struct intel_context *ce, *child; int ret; switch (pe[n].type) { @@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, case I915_GEM_ENGINE_TYPE_BALANCED: ce = intel_engine_create_virtual(pe[n].siblings, - pe[n].num_siblings); + pe[n].num_siblings, 0); + break; + + case I915_GEM_ENGINE_TYPE_PARALLEL: + ce = intel_engine_create_parallel(pe[n].siblings, + pe[n].num_siblings, + pe[n].width); break; case I915_GEM_ENGINE_TYPE_INVALID: @@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, err = ERR_PTR(ret); goto free_engines; } + for_each_child(ce, child) { + ret = intel_context_set_gem(child, ctx, pe->sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + } + + /* + * XXX: Must be done after calling intel_context_set_gem as that + * function changes the ring size. The ring is allocated when + * the context is pinned. If the ring size is changed after + * allocation we have a mismatch of the ring size and will cause + * the context to hang. Presumably with a bit of reordering we + * could move the perma-pin step to the backend function + * intel_engine_create_parallel. + */ + if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) { + ret = perma_pin_contexts(ce); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + } } return e; @@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx) /* Flush any concurrent set_engines() */ mutex_lock(&ctx->engines_mutex); + unpin_engines(__context_engines_static(ctx)); engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1)); i915_gem_context_set_closed(ctx); mutex_unlock(&ctx->engines_mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index a627b09c4680..282cdb8a5c5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -78,13 +78,16 @@ enum i915_gem_engine_type { /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ I915_GEM_ENGINE_TYPE_BALANCED, + + /** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */ + I915_GEM_ENGINE_TYPE_PARALLEL, }; /** * struct i915_gem_proto_engine - prototype engine * * This struct describes an engine that a context may contain. Engines - * have three types: + * have four types: * * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they * show up as a NULL in i915_gem_engines::engines[i] and any attempt to @@ -97,6 +100,10 @@ enum i915_gem_engine_type { * * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. + * + * - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described + * i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and + * i915_gem_proto_engine::siblings. */ struct i915_gem_proto_engine { /** @type: Type of this engine */ @@ -105,10 +112,13 @@ struct i915_gem_proto_engine { /** @engine: Engine, for physical */ struct intel_engine_cs *engine; - /** @num_siblings: Number of balanced siblings */ + /** @num_siblings: Number of balanced or parallel siblings */ unsigned int num_siblings; - /** @siblings: Balanced siblings */ + /** @width: Width of each sibling */ + unsigned int width; + + /** @siblings: Balanced siblings or num_siblings * width for parallel */ struct intel_engine_cs **siblings; /** @sseu: Client-set SSEU parameters */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index afa34111de02..1adcd8e02d29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; unsigned int sg_page_sizes; @@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - sg_page_sizes = i915_sg_dma_sizes(pages->sgl); + /* XXX: consider doing a vmap flush or something */ + if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj)) + wbinvd_on_all_cpus(); + sg_page_sizes = i915_sg_dma_sizes(pages->sgl); __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; @@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, + I915_BO_ALLOC_USER); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 8b3a25bd93e6..4d7da07442f2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -246,17 +246,25 @@ struct i915_execbuffer { struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ struct eb_vma *vma; - struct intel_engine_cs *engine; /** engine to queue the request to */ + struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_request *request; /** our request to build */ - struct eb_vma *batch; /** identity of the batch obj/vma */ + /** our requests to build */ + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; + /** identity of the batch obj/vma */ + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; struct i915_vma *trampoline; /** trampoline used for chaining */ + /** used for excl fence in dma_resv objects when > 1 BB submitted */ + struct dma_fence *composite_fence; + /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; + /* number of batches in execbuf IOCTL */ + unsigned int num_batches; + /** list of vma not yet bound during reservation phase */ struct list_head unbound; @@ -283,7 +291,8 @@ struct i915_execbuffer { u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u64 batch_len; /** Length of batch within object */ + /** Length of batch within object */ + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; u32 batch_start_offset; /** Location within object of batch */ u32 batch_flags; /** Flags composed for emit_bb_start() */ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ @@ -301,14 +310,13 @@ struct i915_execbuffer { }; static int eb_parse(struct i915_execbuffer *eb); -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, - bool throttle); +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_requires_cmd_parser(eb->engine) || - (intel_engine_using_cmd_parser(eb->engine) && + return intel_engine_requires_cmd_parser(eb->context->engine) || + (intel_engine_using_cmd_parser(eb->context->engine) && eb->args->batch_len); } @@ -535,11 +543,21 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0; } -static void +static inline bool +is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) +{ + return eb->args->flags & I915_EXEC_BATCH_FIRST ? + buffer_idx < eb->num_batches : + buffer_idx >= eb->args->buffer_count - eb->num_batches; +} + +static int eb_add_vma(struct i915_execbuffer *eb, - unsigned int i, unsigned batch_idx, + unsigned int *current_batch, + unsigned int i, struct i915_vma *vma) { + struct drm_i915_private *i915 = eb->i915; struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; @@ -566,15 +584,43 @@ eb_add_vma(struct i915_execbuffer *eb, * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - if (i == batch_idx) { + if (is_batch_buffer(eb, i)) { if (entry->relocation_count && !(ev->flags & EXEC_OBJECT_PINNED)) ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; if (eb->reloc_cache.has_fence) ev->flags |= EXEC_OBJECT_NEEDS_FENCE; - eb->batch = ev; + eb->batches[*current_batch] = ev; + + if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) { + drm_dbg(&i915->drm, + "Attempting to use self-modifying batch buffer\n"); + return -EINVAL; + } + + if (range_overflows_t(u64, + eb->batch_start_offset, + eb->args->batch_len, + ev->vma->size)) { + drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); + return -EINVAL; + } + + if (eb->args->batch_len == 0) + eb->batch_len[*current_batch] = ev->vma->size - + eb->batch_start_offset; + else + eb->batch_len[*current_batch] = eb->args->batch_len; + if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */ + drm_dbg(&i915->drm, "Invalid batch length\n"); + return -EINVAL; + } + + ++*current_batch; } + + return 0; } static inline int use_cpu_reloc(const struct reloc_cache *cache, @@ -718,14 +764,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } while (1); } -static unsigned int eb_batch_index(const struct i915_execbuffer *eb) -{ - if (eb->args->flags & I915_EXEC_BATCH_FIRST) - return 0; - else - return eb->buffer_count - 1; -} - static int eb_select_context(struct i915_execbuffer *eb) { struct i915_gem_context *ctx; @@ -846,9 +884,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct drm_i915_private *i915 = eb->i915; - unsigned int batch = eb_batch_index(eb); - unsigned int i; + unsigned int i, current_batch = 0; int err = 0; INIT_LIST_HEAD(&eb->relocs); @@ -868,7 +904,9 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err; } - eb_add_vma(eb, i, batch, vma); + err = eb_add_vma(eb, ¤t_batch, i, vma); + if (err) + return err; if (i915_gem_object_is_userptr(vma->obj)) { err = i915_gem_object_userptr_submit_init(vma->obj); @@ -891,26 +929,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } } - if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { - drm_dbg(&i915->drm, - "Attempting to use self-modifying batch buffer\n"); - return -EINVAL; - } - - if (range_overflows_t(u64, - eb->batch_start_offset, eb->batch_len, - eb->batch->vma->size)) { - drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); - return -EINVAL; - } - - if (eb->batch_len == 0) - eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; - if (unlikely(eb->batch_len == 0)) { /* impossible! */ - drm_dbg(&i915->drm, "Invalid batch length\n"); - return -EINVAL; - } - return 0; err: @@ -1643,8 +1661,7 @@ static int eb_reinit_userptr(struct i915_execbuffer *eb) return 0; } -static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, - struct i915_request *rq) +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) { bool have_copy = false; struct eb_vma *ev; @@ -1660,21 +1677,6 @@ repeat: eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); - if (rq) { - /* nonblocking is always false */ - if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) { - i915_request_put(rq); - rq = NULL; - - err = -EINTR; - goto err_relock; - } - - i915_request_put(rq); - rq = NULL; - } - /* * We take 3 passes through the slowpatch. * @@ -1701,28 +1703,21 @@ repeat: if (!err) err = eb_reinit_userptr(eb); -err_relock: i915_gem_ww_ctx_init(&eb->ww, true); if (err) goto out; /* reacquire the objects */ repeat_validate: - rq = eb_pin_engine(eb, false); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - rq = NULL; + err = eb_pin_engine(eb, false); + if (err) goto err; - } - - /* We didn't throttle, should be NULL */ - GEM_WARN_ON(rq); err = eb_validate_vmas(eb); if (err) goto err; - GEM_BUG_ON(!eb->batch); + GEM_BUG_ON(!eb->batches[0]); list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { @@ -1786,46 +1781,23 @@ out: } } - if (rq) - i915_request_put(rq); - return err; } static int eb_relocate_parse(struct i915_execbuffer *eb) { int err; - struct i915_request *rq = NULL; bool throttle = true; retry: - rq = eb_pin_engine(eb, throttle); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - rq = NULL; + err = eb_pin_engine(eb, throttle); + if (err) { if (err != -EDEADLK) return err; goto err; } - if (rq) { - bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - - /* Need to drop all locks now for throttling, take slowpath */ - err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); - if (err == -ETIME) { - if (nonblock) { - err = -EWOULDBLOCK; - i915_request_put(rq); - goto err; - } - goto slow; - } - i915_request_put(rq); - rq = NULL; - } - /* only throttle once, even if we didn't need to throttle */ throttle = false; @@ -1865,7 +1837,7 @@ err: return err; slow: - err = eb_relocate_parse_slow(eb, rq); + err = eb_relocate_parse_slow(eb); if (err) /* * If the user expects the execobject.offset and @@ -1879,11 +1851,40 @@ slow: return err; } +/* + * Using two helper loops for the order of which requests / batches are created + * and added the to backend. Requests are created in order from the parent to + * the last child. Requests are added in the reverse order, from the last child + * to parent. This is done for locking reasons as the timeline lock is acquired + * during request creation and released when the request is added to the + * backend. To make lockdep happy (see intel_context_timeline_lock) this must be + * the ordering. + */ +#define for_each_batch_create_order(_eb, _i) \ + for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i)) +#define for_each_batch_add_order(_eb, _i) \ + BUILD_BUG_ON(!typecheck(int, _i)); \ + for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i)) + +static struct i915_request * +eb_find_first_request_added(struct i915_execbuffer *eb) +{ + int i; + + for_each_batch_add_order(eb, i) + if (eb->requests[i]) + return eb->requests[i]; + + GEM_BUG_ON("Request not found"); + + return NULL; +} + static int eb_move_to_gpu(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; unsigned int i = count; - int err = 0; + int err = 0, j; while (i--) { struct eb_vma *ev = &eb->vma[i]; @@ -1896,11 +1897,17 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (flags & EXEC_OBJECT_CAPTURE) { struct i915_capture_list *capture; - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (capture) { - capture->next = eb->request->capture_list; - capture->vma = vma; - eb->request->capture_list = capture; + for_each_batch_create_order(eb, j) { + if (!eb->requests[j]) + break; + + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (capture) { + capture->next = + eb->requests[j]->capture_list; + capture->vma = vma; + eb->requests[j]->capture_list = capture; + } } } @@ -1915,20 +1922,43 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) * but gcc's optimiser doesn't handle that as well and emits * two jumps instead of one. Maybe one day... + * + * FIXME: There is also sync flushing in set_pages(), which + * serves a different purpose(some of the time at least). + * + * We should consider: + * + * 1. Rip out the async flush code. + * + * 2. Or make the sync flushing use the async clflush path + * using mandatory fences underneath. Currently the below + * async flush happens after we bind the object. */ if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { if (i915_gem_clflush_object(obj, 0)) flags &= ~EXEC_OBJECT_ASYNC; } + /* We only need to await on the first request */ if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { err = i915_request_await_object - (eb->request, obj, flags & EXEC_OBJECT_WRITE); + (eb_find_first_request_added(eb), obj, + flags & EXEC_OBJECT_WRITE); } - if (err == 0) - err = i915_vma_move_to_active(vma, eb->request, - flags | __EXEC_OBJECT_NO_RESERVE); + for_each_batch_add_order(eb, j) { + if (err) + break; + if (!eb->requests[j]) + continue; + + err = _i915_vma_move_to_active(vma, eb->requests[j], + j ? NULL : + eb->composite_fence ? + eb->composite_fence : + &eb->requests[j]->fence, + flags | __EXEC_OBJECT_NO_RESERVE); + } } #ifdef CONFIG_MMU_NOTIFIER @@ -1959,11 +1989,16 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) goto err_skip; /* Unconditionally flush any chipset caches (for streaming writes). */ - intel_gt_chipset_flush(eb->engine->gt); + intel_gt_chipset_flush(eb->gt); return 0; err_skip: - i915_request_set_error_once(eb->request, err); + for_each_batch_create_order(eb, j) { + if (!eb->requests[j]) + break; + + i915_request_set_error_once(eb->requests[j], err); + } return err; } @@ -2058,14 +2093,17 @@ static int eb_parse(struct i915_execbuffer *eb) int err; if (!eb_use_cmdparser(eb)) { - batch = eb_dispatch_secure(eb, eb->batch->vma); + batch = eb_dispatch_secure(eb, eb->batches[0]->vma); if (IS_ERR(batch)) return PTR_ERR(batch); goto secure_batch; } - len = eb->batch_len; + if (intel_context_is_parallel(eb->context)) + return -EINVAL; + + len = eb->batch_len[0]; if (!CMDPARSER_USES_GGTT(eb->i915)) { /* * ppGTT backed shadow buffers must be mapped RO, to prevent @@ -2079,11 +2117,11 @@ static int eb_parse(struct i915_execbuffer *eb) } else { len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */ + if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */ return -EINVAL; if (!pool) { - pool = intel_gt_get_buffer_pool(eb->engine->gt, len, + pool = intel_gt_get_buffer_pool(eb->gt, len, I915_MAP_WB); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -2108,7 +2146,7 @@ static int eb_parse(struct i915_execbuffer *eb) trampoline = shadow; shadow = shadow_batch_pin(eb, pool->obj, - &eb->engine->gt->ggtt->vm, + &eb->gt->ggtt->vm, PIN_GLOBAL); if (IS_ERR(shadow)) { err = PTR_ERR(shadow); @@ -2130,26 +2168,29 @@ static int eb_parse(struct i915_execbuffer *eb) if (err) goto err_trampoline; - err = intel_engine_cmd_parser(eb->engine, - eb->batch->vma, + err = intel_engine_cmd_parser(eb->context->engine, + eb->batches[0]->vma, eb->batch_start_offset, - eb->batch_len, + eb->batch_len[0], shadow, trampoline); if (err) goto err_unpin_batch; - eb->batch = &eb->vma[eb->buffer_count++]; - eb->batch->vma = i915_vma_get(shadow); - eb->batch->flags = __EXEC_OBJECT_HAS_PIN; + eb->batches[0] = &eb->vma[eb->buffer_count++]; + eb->batches[0]->vma = i915_vma_get(shadow); + eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; eb->trampoline = trampoline; eb->batch_start_offset = 0; secure_batch: if (batch) { - eb->batch = &eb->vma[eb->buffer_count++]; - eb->batch->flags = __EXEC_OBJECT_HAS_PIN; - eb->batch->vma = i915_vma_get(batch); + if (intel_context_is_parallel(eb->context)) + return -EINVAL; + + eb->batches[0] = &eb->vma[eb->buffer_count++]; + eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; + eb->batches[0]->vma = i915_vma_get(batch); } return 0; @@ -2165,19 +2206,18 @@ err: return err; } -static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) +static int eb_request_submit(struct i915_execbuffer *eb, + struct i915_request *rq, + struct i915_vma *batch, + u64 batch_len) { int err; - if (intel_context_nopreempt(eb->context)) - __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); - - err = eb_move_to_gpu(eb); - if (err) - return err; + if (intel_context_nopreempt(rq->context)) + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { - err = i915_reset_gen7_sol_offsets(eb->request); + err = i915_reset_gen7_sol_offsets(rq); if (err) return err; } @@ -2188,26 +2228,26 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) * allows us to determine if the batch is still waiting on the GPU * or actually running by checking the breadcrumb. */ - if (eb->engine->emit_init_breadcrumb) { - err = eb->engine->emit_init_breadcrumb(eb->request); + if (rq->context->engine->emit_init_breadcrumb) { + err = rq->context->engine->emit_init_breadcrumb(rq); if (err) return err; } - err = eb->engine->emit_bb_start(eb->request, - batch->node.start + - eb->batch_start_offset, - eb->batch_len, - eb->batch_flags); + err = rq->context->engine->emit_bb_start(rq, + batch->node.start + + eb->batch_start_offset, + batch_len, + eb->batch_flags); if (err) return err; if (eb->trampoline) { + GEM_BUG_ON(intel_context_is_parallel(rq->context)); GEM_BUG_ON(eb->batch_start_offset); - err = eb->engine->emit_bb_start(eb->request, - eb->trampoline->node.start + - eb->batch_len, - 0, 0); + err = rq->context->engine->emit_bb_start(rq, + eb->trampoline->node.start + + batch_len, 0, 0); if (err) return err; } @@ -2215,6 +2255,27 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) return 0; } +static int eb_submit(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + err = eb_move_to_gpu(eb); + + for_each_batch_create_order(eb, i) { + if (!eb->requests[i]) + break; + + trace_i915_request_queue(eb->requests[i], eb->batch_flags); + if (!err) + err = eb_request_submit(eb, eb->requests[i], + eb->batches[i]->vma, + eb->batch_len[i]); + } + + return err; +} + static int num_vcs_engines(const struct drm_i915_private *i915) { return hweight_long(VDBOX_MASK(&i915->gt)); @@ -2280,26 +2341,11 @@ static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel return i915_request_get(rq); } -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) +static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, + bool throttle) { - struct intel_context *ce = eb->context; struct intel_timeline *tl; struct i915_request *rq = NULL; - int err; - - GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); - - if (unlikely(intel_context_is_banned(ce))) - return ERR_PTR(-EIO); - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - err = intel_context_pin_ww(ce, &eb->ww); - if (err) - return ERR_PTR(err); /* * Take a local wakeref for preparing to dispatch the execbuf as @@ -2310,33 +2356,108 @@ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throt * taken on the engine, and the parent device. */ tl = intel_context_timeline_lock(ce); - if (IS_ERR(tl)) { - intel_context_unpin(ce); - return ERR_CAST(tl); - } + if (IS_ERR(tl)) + return PTR_ERR(tl); intel_context_enter(ce); if (throttle) rq = eb_throttle(eb, ce); intel_context_timeline_unlock(tl); + if (rq) { + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; + long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT; + + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + timeout) < 0) { + i915_request_put(rq); + + tl = intel_context_timeline_lock(ce); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); + + if (nonblock) + return -EWOULDBLOCK; + else + return -EINTR; + } + i915_request_put(rq); + } + + return 0; +} + +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle) +{ + struct intel_context *ce = eb->context, *child; + int err; + int i = 0, j = 0; + + GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); + + if (unlikely(intel_context_is_banned(ce))) + return -EIO; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + err = intel_context_pin_ww(ce, &eb->ww); + if (err) + return err; + for_each_child(ce, child) { + err = intel_context_pin_ww(child, &eb->ww); + GEM_BUG_ON(err); /* perma-pinned should incr a counter */ + } + + for_each_child(ce, child) { + err = eb_pin_timeline(eb, child, throttle); + if (err) + goto unwind; + ++i; + } + err = eb_pin_timeline(eb, ce, throttle); + if (err) + goto unwind; + eb->args->flags |= __EXEC_ENGINE_PINNED; - return rq; + return 0; + +unwind: + for_each_child(ce, child) { + if (j++ < i) { + mutex_lock(&child->timeline->mutex); + intel_context_exit(child); + mutex_unlock(&child->timeline->mutex); + } + } + for_each_child(ce, child) + intel_context_unpin(child); + intel_context_unpin(ce); + return err; } static void eb_unpin_engine(struct i915_execbuffer *eb) { - struct intel_context *ce = eb->context; - struct intel_timeline *tl = ce->timeline; + struct intel_context *ce = eb->context, *child; if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) return; eb->args->flags &= ~__EXEC_ENGINE_PINNED; - mutex_lock(&tl->mutex); + for_each_child(ce, child) { + mutex_lock(&child->timeline->mutex); + intel_context_exit(child); + mutex_unlock(&child->timeline->mutex); + + intel_context_unpin(child); + } + + mutex_lock(&ce->timeline->mutex); intel_context_exit(ce); - mutex_unlock(&tl->mutex); + mutex_unlock(&ce->timeline->mutex); intel_context_unpin(ce); } @@ -2387,7 +2508,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) static int eb_select_engine(struct i915_execbuffer *eb) { - struct intel_context *ce; + struct intel_context *ce, *child; unsigned int idx; int err; @@ -2400,6 +2521,20 @@ eb_select_engine(struct i915_execbuffer *eb) if (IS_ERR(ce)) return PTR_ERR(ce); + if (intel_context_is_parallel(ce)) { + if (eb->buffer_count < ce->parallel.number_children + 1) { + intel_context_put(ce); + return -EINVAL; + } + if (eb->batch_start_offset || eb->args->batch_len) { + intel_context_put(ce); + return -EINVAL; + } + } + eb->num_batches = ce->parallel.number_children + 1; + + for_each_child(ce, child) + intel_context_get(child); intel_gt_pm_get(ce->engine->gt); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { @@ -2407,6 +2542,13 @@ eb_select_engine(struct i915_execbuffer *eb) if (err) goto err; } + for_each_child(ce, child) { + if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) { + err = intel_context_alloc_state(child); + if (err) + goto err; + } + } /* * ABI: Before userspace accesses the GPU (e.g. execbuffer), report @@ -2417,7 +2559,7 @@ eb_select_engine(struct i915_execbuffer *eb) goto err; eb->context = ce; - eb->engine = ce->engine; + eb->gt = ce->engine->gt; /* * Make sure engine pool stays alive even if we call intel_context_put @@ -2428,6 +2570,8 @@ eb_select_engine(struct i915_execbuffer *eb) err: intel_gt_pm_put(ce->engine->gt); + for_each_child(ce, child) + intel_context_put(child); intel_context_put(ce); return err; } @@ -2435,7 +2579,11 @@ err: static void eb_put_engine(struct i915_execbuffer *eb) { - intel_gt_pm_put(eb->engine->gt); + struct intel_context *child; + + intel_gt_pm_put(eb->gt); + for_each_child(eb->context, child) + intel_context_put(child); intel_context_put(eb->context); } @@ -2658,7 +2806,8 @@ static void put_fence_array(struct eb_fence *fences, int num_fences) } static int -await_fence_array(struct i915_execbuffer *eb) +await_fence_array(struct i915_execbuffer *eb, + struct i915_request *rq) { unsigned int n; int err; @@ -2672,8 +2821,7 @@ await_fence_array(struct i915_execbuffer *eb) if (!eb->fences[n].dma_fence) continue; - err = i915_request_await_dma_fence(eb->request, - eb->fences[n].dma_fence); + err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence); if (err < 0) return err; } @@ -2681,9 +2829,9 @@ await_fence_array(struct i915_execbuffer *eb) return 0; } -static void signal_fence_array(const struct i915_execbuffer *eb) +static void signal_fence_array(const struct i915_execbuffer *eb, + struct dma_fence * const fence) { - struct dma_fence * const fence = &eb->request->fence; unsigned int n; for (n = 0; n < eb->num_fences; n++) { @@ -2731,9 +2879,9 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end) break; } -static int eb_request_add(struct i915_execbuffer *eb, int err) +static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq, + int err, bool last_parallel) { - struct i915_request *rq = eb->request; struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_sched_attr attr = {}; struct i915_request *prev; @@ -2755,6 +2903,17 @@ static int eb_request_add(struct i915_execbuffer *eb, int err) err = -ENOENT; /* override any transient errors */ } + if (intel_context_is_parallel(eb->context)) { + if (err) { + __i915_request_skip(rq); + set_bit(I915_FENCE_FLAG_SKIP_PARALLEL, + &rq->fence.flags); + } + if (last_parallel) + set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, + &rq->fence.flags); + } + __i915_request_queue(rq, &attr); /* Try to clean up the client's timeline after submitting the request */ @@ -2766,6 +2925,25 @@ static int eb_request_add(struct i915_execbuffer *eb, int err) return err; } +static int eb_requests_add(struct i915_execbuffer *eb, int err) +{ + int i; + + /* + * We iterate in reverse order of creation to release timeline mutexes in + * same order. + */ + for_each_batch_add_order(eb, i) { + struct i915_request *rq = eb->requests[i]; + + if (!rq) + continue; + err |= eb_request_add(eb, rq, err, i == 0); + } + + return err; +} + static const i915_user_extension_fn execbuf_extensions[] = { [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; @@ -2792,6 +2970,185 @@ parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, eb); } +static void eb_requests_get(struct i915_execbuffer *eb) +{ + unsigned int i; + + for_each_batch_create_order(eb, i) { + if (!eb->requests[i]) + break; + + i915_request_get(eb->requests[i]); + } +} + +static void eb_requests_put(struct i915_execbuffer *eb) +{ + unsigned int i; + + for_each_batch_create_order(eb, i) { + if (!eb->requests[i]) + break; + + i915_request_put(eb->requests[i]); + } +} + +static struct sync_file * +eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) +{ + struct sync_file *out_fence = NULL; + struct dma_fence_array *fence_array; + struct dma_fence **fences; + unsigned int i; + + GEM_BUG_ON(!intel_context_is_parent(eb->context)); + + fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + for_each_batch_create_order(eb, i) { + fences[i] = &eb->requests[i]->fence; + __set_bit(I915_FENCE_FLAG_COMPOSITE, + &eb->requests[i]->fence.flags); + } + + fence_array = dma_fence_array_create(eb->num_batches, + fences, + eb->context->parallel.fence_context, + eb->context->parallel.seqno, + false); + if (!fence_array) { + kfree(fences); + return ERR_PTR(-ENOMEM); + } + + /* Move ownership to the dma_fence_array created above */ + for_each_batch_create_order(eb, i) + dma_fence_get(fences[i]); + + if (out_fence_fd != -1) { + out_fence = sync_file_create(&fence_array->base); + /* sync_file now owns fence_arry, drop creation ref */ + dma_fence_put(&fence_array->base); + if (!out_fence) + return ERR_PTR(-ENOMEM); + } + + eb->composite_fence = &fence_array->base; + + return out_fence; +} + +static struct sync_file * +eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq, + struct dma_fence *in_fence, int out_fence_fd) +{ + struct sync_file *out_fence = NULL; + int err; + + if (unlikely(eb->gem_context->syncobj)) { + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(eb->gem_context->syncobj); + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + if (err) + return ERR_PTR(err); + } + + if (in_fence) { + if (eb->args->flags & I915_EXEC_FENCE_SUBMIT) + err = i915_request_await_execution(rq, in_fence); + else + err = i915_request_await_dma_fence(rq, in_fence); + if (err < 0) + return ERR_PTR(err); + } + + if (eb->fences) { + err = await_fence_array(eb, rq); + if (err) + return ERR_PTR(err); + } + + if (intel_context_is_parallel(eb->context)) { + out_fence = eb_composite_fence_create(eb, out_fence_fd); + if (IS_ERR(out_fence)) + return ERR_PTR(-ENOMEM); + } else if (out_fence_fd != -1) { + out_fence = sync_file_create(&rq->fence); + if (!out_fence) + return ERR_PTR(-ENOMEM); + } + + return out_fence; +} + +static struct intel_context * +eb_find_context(struct i915_execbuffer *eb, unsigned int context_number) +{ + struct intel_context *child; + + if (likely(context_number == 0)) + return eb->context; + + for_each_child(eb->context, child) + if (!--context_number) + return child; + + GEM_BUG_ON("Context not found"); + + return NULL; +} + +static struct sync_file * +eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, + int out_fence_fd) +{ + struct sync_file *out_fence = NULL; + unsigned int i; + + for_each_batch_create_order(eb, i) { + /* Allocate a request for this batch buffer nice and early. */ + eb->requests[i] = i915_request_create(eb_find_context(eb, i)); + if (IS_ERR(eb->requests[i])) { + out_fence = ERR_PTR(PTR_ERR(eb->requests[i])); + eb->requests[i] = NULL; + return out_fence; + } + + /* + * Only the first request added (committed to backend) has to + * take the in fences into account as all subsequent requests + * will have fences inserted inbetween them. + */ + if (i + 1 == eb->num_batches) { + out_fence = eb_fences_add(eb, eb->requests[i], + in_fence, out_fence_fd); + if (IS_ERR(out_fence)) + return out_fence; + } + + /* + * Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when + * this request is retired will the batch_obj be moved onto + * the inactive_list and lose its active reference. Hence we do + * not need to explicitly hold another reference here. + */ + eb->requests[i]->batch = eb->batches[i]->vma; + if (eb->batch_pool) { + GEM_BUG_ON(intel_context_is_parallel(eb->context)); + intel_gt_buffer_pool_mark_active(eb->batch_pool, + eb->requests[i]); + } + } + + return out_fence; +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, @@ -2802,7 +3159,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; - struct i915_vma *batch; int out_fence_fd = -1; int err; @@ -2826,12 +3182,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; - eb.batch_len = args->batch_len; eb.trampoline = NULL; eb.fences = NULL; eb.num_fences = 0; + memset(eb.requests, 0, sizeof(struct i915_request *) * + ARRAY_SIZE(eb.requests)); + eb.composite_fence = NULL; + eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (GRAPHICS_VER(i915) >= 11) @@ -2915,70 +3274,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, ww_acquire_done(&eb.ww.ctx); - batch = eb.batch->vma; - - /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_create(eb.context); - if (IS_ERR(eb.request)) { - err = PTR_ERR(eb.request); - goto err_vma; - } - - if (unlikely(eb.gem_context->syncobj)) { - struct dma_fence *fence; - - fence = drm_syncobj_fence_get(eb.gem_context->syncobj); - err = i915_request_await_dma_fence(eb.request, fence); - dma_fence_put(fence); - if (err) - goto err_ext; - } - - if (in_fence) { - if (args->flags & I915_EXEC_FENCE_SUBMIT) - err = i915_request_await_execution(eb.request, - in_fence); - else - err = i915_request_await_dma_fence(eb.request, - in_fence); - if (err < 0) - goto err_request; - } - - if (eb.fences) { - err = await_fence_array(&eb); - if (err) + out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); + if (IS_ERR(out_fence)) { + err = PTR_ERR(out_fence); + if (eb.requests[0]) goto err_request; + else + goto err_vma; } - if (out_fence_fd != -1) { - out_fence = sync_file_create(&eb.request->fence); - if (!out_fence) { - err = -ENOMEM; - goto err_request; - } - } - - /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - eb.request->batch = batch; - if (eb.batch_pool) - intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); - - trace_i915_request_queue(eb.request, eb.batch_flags); - err = eb_submit(&eb, batch); + err = eb_submit(&eb); err_request: - i915_request_get(eb.request); - err = eb_request_add(&eb, err); + eb_requests_get(&eb); + err = eb_requests_add(&eb, err); if (eb.fences) - signal_fence_array(&eb); + signal_fence_array(&eb, eb.composite_fence ? + eb.composite_fence : + &eb.requests[0]->fence); if (out_fence) { if (err == 0) { @@ -2993,10 +3307,15 @@ err_request: if (unlikely(eb.gem_context->syncobj)) { drm_syncobj_replace_fence(eb.gem_context->syncobj, - &eb.request->fence); + eb.composite_fence ? + eb.composite_fence : + &eb.requests[0]->fence); } - i915_request_put(eb.request); + if (!out_fence && eb.composite_fence) + dma_fence_put(eb.composite_fence); + + eb_requests_put(&eb); err_vma: eb_release_vmas(&eb, true); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index e5ae9c06510c..a57a6b7013c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; + + __start_cpu_write(obj); } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 76ce6a1500bc..1e426a42a36c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); } +bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + /* + * This is purely from a security perspective, so we simply don't care + * about non-userspace objects being able to bypass the LLC. + */ + if (!(obj->flags & I915_BO_ALLOC_USER)) + return false; + + /* + * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it + * possible for userspace to bypass the GTT caching bits set by the + * kernel, as per the given object cache_level. This is troublesome + * since the heavy flush we apply when first gathering the pages is + * skipped if the kernel thinks the object is coherent with the GPU. As + * a result it might be possible to bypass the cache and read the + * contents of the page directly, which could be stale data. If it's + * just a case of userspace shooting themselves in the foot then so be + * it, but since i915 takes the stance of always zeroing memory before + * handing it to userspace, we need to prevent this. + */ + return IS_JSL_EHL(i915); +} + static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 9df3ee60604e..59201801cec5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); +bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 7c3da4e3e737..da85169006d4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -427,6 +427,33 @@ struct drm_i915_gem_object { * can freely bypass the CPU cache when touching the pages with the GPU, * where the kernel is completely unaware. On such platform we need * apply the sledgehammer-on-acquire regardless of the @cache_coherent. + * + * Special care is taken on non-LLC platforms, to prevent potential + * information leak. The driver currently ensures: + * + * 1. All userspace objects, by default, have @cache_level set as + * I915_CACHE_NONE. The only exception is userptr objects, where we + * instead force I915_CACHE_LLC, but we also don't allow userspace to + * ever change the @cache_level for such objects. Another special case + * is dma-buf, which doesn't rely on @cache_dirty, but there we + * always do a forced flush when acquiring the pages, if there is a + * chance that the pages can be read directly from main memory with + * the GPU. + * + * 2. All I915_CACHE_NONE objects have @cache_dirty initially true. + * + * 3. All swapped-out objects(i.e shmem) have @cache_dirty set to + * true. + * + * 4. The @cache_dirty is never freely reset before the initial + * flush, even if userspace adjusts the @cache_level through the + * i915_gem_set_caching_ioctl. + * + * 5. All @cache_dirty objects(including swapped-in) are initially + * flushed with a synchronous call to drm_clflush_sg in + * __i915_gem_object_set_pages. The @cache_dirty can be freely reset + * at this point. All further asynchronous clfushes are never security + * critical, i.e userspace is free to race against itself. */ unsigned int cache_dirty:1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 11f072193f3b..d77da59fae04 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -182,22 +182,7 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - /* - * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it - * possible for userspace to bypass the GTT caching bits set by the - * kernel, as per the given object cache_level. This is troublesome - * since the heavy flush we apply when first gathering the pages is - * skipped if the kernel thinks the object is coherent with the GPU. As - * a result it might be possible to bypass the cache and read the - * contents of the page directly, which could be stale data. If it's - * just a case of userspace shooting themselves in the foot then so be - * it, but since i915 takes the stance of always zeroing memory before - * handing it to userspace, we need to prevent this. - * - * By setting cache_dirty here we make the clflush in set_pages - * unconditional on such platforms. - */ - if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER) + if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, bool needs_clflush) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); if (obj->mm.madv == I915_MADV_DONTNEED) @@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, drm_clflush_sg(pages); __start_cpu_write(obj); + /* + * On non-LLC platforms, force the flush-on-acquire if this is ever + * swapped-in. Our async flush path is not trust worthy enough yet(and + * happens in the wrong order), and with some tricks it's conceivable + * for userspace to change the cache-level to I915_CACHE_NONE after the + * pages are swapped-in, and since execbuf binds the object before doing + * the async flush, we have a race window. + */ + if (!HAS_LLC(i915)) + obj->cache_dirty = true; } void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 8ea0fa665e53..3173c9f9a040 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -165,8 +165,11 @@ alloc_table: goto err; } - sg_page_sizes = i915_sg_dma_sizes(st->sgl); + WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)); + if (i915_gem_object_can_bypass_llc(obj)) + obj->cache_dirty = true; + sg_page_sizes = i915_sg_dma_sizes(st->sgl); __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, + I915_BO_ALLOC_USER); obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 41d0680f3bd7..b2003133deaf 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; + + __start_cpu_write(obj); } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915, { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; + unsigned int cache_level; GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); @@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); obj->mm.page_mask = page_mask; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index ecbcbb86ae1e..8402ed925a69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -17,13 +17,20 @@ #include "huge_gem_object.h" #include "mock_context.h" +enum client_tiling { + CLIENT_TILING_LINEAR, + CLIENT_TILING_X, + CLIENT_TILING_Y, + CLIENT_NUM_TILING_TYPES +}; + #define WIDTH 512 #define HEIGHT 32 struct blit_buffer { struct i915_vma *vma; u32 start_val; - u32 tiling; + enum client_tiling tiling; }; struct tiled_blits { @@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; - if (src->tiling == I915_TILING_Y) + if (src->tiling == CLIENT_TILING_Y) cmd |= BCS_SRC_Y; - if (dst->tiling == I915_TILING_Y) + if (dst->tiling == CLIENT_TILING_Y) cmd |= BCS_DST_Y; *cs++ = cmd; @@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t, t->buffers[i].vma = vma; t->buffers[i].tiling = - i915_prandom_u32_max_state(I915_TILING_Y + 1, prng); + i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng); } return 0; @@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset) static u64 tiled_offset(const struct intel_gt *gt, u64 v, unsigned int stride, - unsigned int tiling) + enum client_tiling tiling) { unsigned int swizzle; u64 x, y; - if (tiling == I915_TILING_NONE) + if (tiling == CLIENT_TILING_LINEAR) return v; y = div64_u64_rem(v, stride, &x); - if (tiling == I915_TILING_X) { + if (tiling == CLIENT_TILING_X) { v = div64_u64_rem(y, 8, &y) * stride * 8; v += y * 512; v += div64_u64_rem(x, 512, &x) << 12; @@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt, return v; } -static const char *repr_tiling(int tiling) +static const char *repr_tiling(enum client_tiling tiling) { switch (tiling) { - case I915_TILING_NONE: return "linear"; - case I915_TILING_X: return "X"; - case I915_TILING_Y: return "Y"; + case CLIENT_TILING_LINEAR: return "linear"; + case CLIENT_TILING_X: return "X"; + case CLIENT_TILING_Y: return "Y"; default: return "unknown"; } } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index e9a0cad5c34d..5634d14052bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, if (err) goto err_post_unpin; + intel_engine_pm_might_get(ce->engine); + if (unlikely(intel_context_is_closed(ce))) { err = -ENOENT; goto err_unlock; @@ -362,8 +364,9 @@ static int __intel_context_active(struct i915_active *active) return 0; } -static int sw_fence_dummy_notify(struct i915_sw_fence *sf, - enum i915_sw_fence_notify state) +static int __i915_sw_fence_call +sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) { return NOTIFY_DONE; } @@ -399,6 +402,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->guc_id.id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id.link); + INIT_LIST_HEAD(&ce->destroyed_link); + + INIT_LIST_HEAD(&ce->parallel.child_list); + /* * Initialize fence to be complete as this is expected to be complete * unless there is a pending schedule disable outstanding. @@ -413,10 +420,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) void intel_context_fini(struct intel_context *ce) { + struct intel_context *child, *next; + if (ce->timeline) intel_timeline_put(ce->timeline); i915_vm_put(ce->vm); + /* Need to put the creation ref for the children */ + if (intel_context_is_parent(ce)) + for_each_child_safe(ce, child, next) + intel_context_put(child); + mutex_destroy(&ce->pin_mutex); i915_active_fini(&ce->active); i915_sw_fence_fini(&ce->guc_state.blocked); @@ -515,24 +529,53 @@ retry: struct i915_request *intel_context_find_active_request(struct intel_context *ce) { + struct intel_context *parent = intel_context_to_parent(ce); struct i915_request *rq, *active = NULL; unsigned long flags; GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); - spin_lock_irqsave(&ce->guc_state.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_state.requests, + /* + * We search the parent list to find an active request on the submitted + * context. The parent list contains the requests for all the contexts + * in the relationship so we have to do a compare of each request's + * context. + */ + spin_lock_irqsave(&parent->guc_state.lock, flags); + list_for_each_entry_reverse(rq, &parent->guc_state.requests, sched.link) { + if (rq->context != ce) + continue; if (i915_request_completed(rq)) break; active = rq; } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock_irqrestore(&parent->guc_state.lock, flags); return active; } +void intel_context_bind_parent_child(struct intel_context *parent, + struct intel_context *child) +{ + /* + * Callers responsibility to validate that this function is used + * correctly but we use GEM_BUG_ON here ensure that they do. + */ + GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); + GEM_BUG_ON(intel_context_is_pinned(parent)); + GEM_BUG_ON(intel_context_is_child(parent)); + GEM_BUG_ON(intel_context_is_pinned(child)); + GEM_BUG_ON(intel_context_is_child(child)); + GEM_BUG_ON(intel_context_is_parent(child)); + + parent->parallel.child_index = parent->parallel.number_children++; + list_add_tail(&child->parallel.child_link, + &parent->parallel.child_list); + child->parallel.parent = parent; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index c41098950746..246c37d72cd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce); int intel_context_reconfigure_sseu(struct intel_context *ce, const struct intel_sseu sseu); +#define PARENT_SCRATCH_SIZE PAGE_SIZE + +static inline bool intel_context_is_child(struct intel_context *ce) +{ + return !!ce->parallel.parent; +} + +static inline bool intel_context_is_parent(struct intel_context *ce) +{ + return !!ce->parallel.number_children; +} + +static inline bool intel_context_is_pinned(struct intel_context *ce); + +static inline struct intel_context * +intel_context_to_parent(struct intel_context *ce) +{ + if (intel_context_is_child(ce)) { + /* + * The parent holds ref count to the child so it is always safe + * for the parent to access the child, but the child has a + * pointer to the parent without a ref. To ensure this is safe + * the child should only access the parent pointer while the + * parent is pinned. + */ + GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); + + return ce->parallel.parent; + } else { + return ce; + } +} + +static inline bool intel_context_is_parallel(struct intel_context *ce) +{ + return intel_context_is_child(ce) || intel_context_is_parent(ce); +} + +void intel_context_bind_parent_child(struct intel_context *parent, + struct intel_context *child); + +#define for_each_child(parent, ce)\ + list_for_each_entry(ce, &(parent)->parallel.child_list,\ + parallel.child_link) +#define for_each_child_safe(parent, ce, cn)\ + list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\ + parallel.child_link) + /** * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context * @ce - the context @@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce) struct intel_timeline *tl = ce->timeline; int err; - err = mutex_lock_interruptible(&tl->mutex); + if (intel_context_is_parent(ce)) + err = mutex_lock_interruptible_nested(&tl->mutex, 0); + else if (intel_context_is_child(ce)) + err = mutex_lock_interruptible_nested(&tl->mutex, + ce->parallel.child_index + 1); + else + err = mutex_lock_interruptible(&tl->mutex); if (err) return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 12252c411159..9e0177dc5484 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -55,9 +55,13 @@ struct intel_context_ops { void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); - /* virtual engine/context interface */ + /* virtual/parallel engine/context interface */ struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, - unsigned int count); + unsigned int count, + unsigned long flags); + struct intel_context *(*create_parallel)(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width); struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, unsigned int sibling); }; @@ -113,6 +117,7 @@ struct intel_context { #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 #define CONTEXT_GUC_INIT 10 +#define CONTEXT_PERMA_PIN 11 struct { u64 timeout_us; @@ -197,22 +202,80 @@ struct intel_context { struct { /** * @id: handle which is used to uniquely identify this context - * with the GuC, protected by guc->contexts_lock + * with the GuC, protected by guc->submission_state.lock */ u16 id; /** * @ref: the number of references to the guc_id, when * transitioning in and out of zero protected by - * guc->contexts_lock + * guc->submission_state.lock */ atomic_t ref; /** * @link: in guc->guc_id_list when the guc_id has no refs but is - * still valid, protected by guc->contexts_lock + * still valid, protected by guc->submission_state.lock */ struct list_head link; } guc_id; + /** + * @destroyed_link: link in guc->submission_state.destroyed_contexts, in + * list when context is pending to be destroyed (deregistered with the + * GuC), protected by guc->submission_state.lock + */ + struct list_head destroyed_link; + + /** @parallel: sub-structure for parallel submission members */ + struct { + union { + /** + * @child_list: parent's list of children + * contexts, no protection as immutable after context + * creation + */ + struct list_head child_list; + /** + * @child_link: child's link into parent's list of + * children + */ + struct list_head child_link; + }; + /** @parent: pointer to parent if child */ + struct intel_context *parent; + /** + * @last_rq: last request submitted on a parallel context, used + * to insert submit fences between requests in the parallel + * context + */ + struct i915_request *last_rq; + /** + * @fence_context: fence context composite fence when doing + * parallel submission + */ + u64 fence_context; + /** + * @seqno: seqno for composite fence when doing parallel + * submission + */ + u32 seqno; + /** @number_children: number of children if parent */ + u8 number_children; + /** @child_index: index into child_list if child */ + u8 child_index; + /** @guc: GuC specific members for parallel submission */ + struct { + /** @wqi_head: head pointer in work queue */ + u16 wqi_head; + /** @wqi_tail: tail pointer in work queue */ + u16 wqi_tail; + /** + * @parent_page: page in context state (ce->state) used + * by parent for work queue, process descriptor + */ + u8 parent_page; + } guc; + } parallel; + #ifdef CONFIG_DRM_I915_SELFTEST /** * @drop_schedule_enable: Force drop of schedule enable G2H for selftest diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 452248884ef1..08559ace0ada 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -2,6 +2,7 @@ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ +#include <asm/cacheflush.h> #include <drm/drm_util.h> #include <linux/hashtable.h> @@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +#define FORCE_VIRTUAL BIT(0) struct intel_context * intel_engine_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); + unsigned int count, unsigned long flags); + +static inline struct intel_context * +intel_engine_create_parallel(struct intel_engine_cs **engines, + unsigned int num_engines, + unsigned int width) +{ + GEM_BUG_ON(!engines[0]->cops->create_parallel); + return engines[0]->cops->create_parallel(engines, num_engines, width); +} static inline bool intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2ae57e4656a3..ff6753ccb129 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) GEM_DEBUG_WARN_ON(iir); } -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, + u8 logical_instance) { const struct engine_info *info = &intel_engines[id]; struct drm_i915_private *i915 = gt->i915; @@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->class = info->class; engine->instance = info->instance; + engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); engine->props.heartbeat_interval_ms = @@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) return info->engine_mask; } +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, + u8 class, const u8 *map, u8 num_instances) +{ + int i, j; + u8 current_logical_id = 0; + + for (j = 0; j < num_instances; ++j) { + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { + if (!HAS_ENGINE(gt, i) || + intel_engines[i].class != class) + continue; + + if (intel_engines[i].instance == map[j]) { + logical_ids[intel_engines[i].instance] = + current_logical_id++; + break; + } + } + } +} + +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) +{ + int i; + u8 map[MAX_ENGINE_INSTANCE + 1]; + + for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) + map[i] = i; + populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map)); +} + /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @gt: pointer to struct intel_gt @@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; const unsigned int engine_mask = init_engine_mask(gt); unsigned int mask = 0; - unsigned int i; + unsigned int i, class; + u8 logical_ids[MAX_ENGINE_INSTANCE + 1]; int err; drm_WARN_ON(&i915->drm, engine_mask == 0); @@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt) if (i915_inject_probe_failure(i915)) return -ENODEV; - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(gt, i)) - continue; + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + setup_logical_ids(gt, logical_ids, class); - err = intel_engine_setup(gt, i); - if (err) - goto cleanup; + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { + u8 instance = intel_engines[i].instance; + + if (intel_engines[i].class != class || + !HAS_ENGINE(gt, i)) + continue; - mask |= BIT(i); + err = intel_engine_setup(gt, i, + logical_ids[instance]); + if (err) + goto cleanup; + + mask |= BIT(i); + } } /* @@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) struct intel_context * intel_engine_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) + unsigned int count, unsigned long flags) { if (count == 0) return ERR_PTR(-EINVAL); - if (count == 1) + if (count == 1 && !(flags & FORCE_VIRTUAL)) return intel_context_create(siblings[0]); GEM_BUG_ON(!siblings[0]->cops->create_virtual); - return siblings[0]->cops->create_virtual(siblings, count); + return siblings[0]->cops->create_virtual(siblings, count, flags); } struct i915_request * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index dacd62773735..a1334b48dde7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) unsigned long flags; bool result = true; + /* + * This is execlist specific behaviour intended to ensure the GPU is + * idle by switching to a known 'safe' context. With GuC submission, the + * same idle guarantee is achieved by other means (disabling + * scheduling). Further, switching to a 'safe' context has no effect + * with GuC submission as the scheduler can just switch back again. + * + * FIXME: Move this backend scheduler specific behaviour into the + * scheduler backend. + */ + if (intel_engine_uses_guc(engine)) + return true; + /* GPU is pointing to the void, as good as in the kernel context. */ if (intel_gt_is_wedged(engine->gt)) return true; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 8520c595f5e1..d68675925b79 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -6,9 +6,11 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "i915_drv.h" #include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" +#include "intel_gt_pm.h" static inline bool intel_engine_pm_is_awake(const struct intel_engine_cs *engine) @@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine) return intel_wakeref_is_active(&engine->wakeref); } +static inline void __intel_engine_pm_get(struct intel_engine_cs *engine) +{ + __intel_wakeref_get(&engine->wakeref); +} + static inline void intel_engine_pm_get(struct intel_engine_cs *engine) { intel_wakeref_get(&engine->wakeref); @@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) return intel_wakeref_get_if_active(&engine->wakeref); } +static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine) +{ + if (!intel_engine_is_virtual(engine)) { + intel_wakeref_might_get(&engine->wakeref); + } else { + struct intel_gt *gt = engine->gt; + struct intel_engine_cs *tengine; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(tengine, gt, mask, tmp) + intel_wakeref_might_get(&tengine->wakeref); + } + intel_gt_pm_might_get(engine->gt); +} + static inline void intel_engine_pm_put(struct intel_engine_cs *engine) { intel_wakeref_put(&engine->wakeref); @@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) intel_wakeref_unlock_wait(&engine->wakeref); } +static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine) +{ + if (!intel_engine_is_virtual(engine)) { + intel_wakeref_might_put(&engine->wakeref); + } else { + struct intel_gt *gt = engine->gt; + struct intel_engine_cs *tengine; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(tengine, gt, mask, tmp) + intel_wakeref_might_put(&tengine->wakeref); + } + intel_gt_pm_might_put(engine->gt); +} + static inline struct i915_request * intel_engine_create_kernel_request(struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 9167ce52487c..e0f773585c29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -269,6 +269,13 @@ struct intel_engine_cs { unsigned int guc_id; intel_engine_mask_t mask; + /** + * @logical_mask: logical mask of engine, reported to user space via + * query IOCTL and used to communicate with the GuC in logical space. + * The logical instance of a physical engine can change based on product + * and fusing. + */ + intel_engine_mask_t logical_mask; u8 class; u8 instance; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 73a79c2acd3a..bedb80057046 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) } static struct intel_context * -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags); static struct i915_request * __active_request(const struct intel_timeline * const tl, @@ -3784,7 +3785,8 @@ unlock: } static struct intel_context * -execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags) { struct virtual_engine *ve; unsigned int n; @@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) ve->siblings[ve->num_siblings++] = sibling; ve->base.mask |= sibling->mask; + ve->base.logical_mask |= sibling->logical_mask; /* * All physical engines must be compatible for their emission diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 1fe19ccd2794..f103664b71d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -13,6 +13,59 @@ #include "pxp/intel_pxp_debugfs.h" #include "uc/intel_uc_debugfs.h" +int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val) +{ + int ret = intel_gt_terminally_wedged(gt); + + switch (ret) { + case -EIO: + *val = 1; + return 0; + case 0: + *val = 0; + return 0; + default: + return ret; + } +} + +int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val) +{ + /* Flush any previous reset before applying for a new one */ + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); + + intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE, + "Manually reset engine mask to %llx", val); + return 0; +} + +/* + * keep the interface clean where the first parameter + * is a 'struct intel_gt *' instead of 'void *' + */ +static int __intel_gt_debugfs_reset_show(void *data, u64 *val) +{ + return intel_gt_debugfs_reset_show(data, val); +} + +static int __intel_gt_debugfs_reset_store(void *data, u64 val) +{ + return intel_gt_debugfs_reset_store(data, val); +} + +DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show, + __intel_gt_debugfs_reset_store, "%llu\n"); + +static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "reset", &reset_fops, NULL }, + }; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); +} + void intel_gt_debugfs_register(struct intel_gt *gt) { struct dentry *root; @@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt) if (IS_ERR(root)) return; + gt_debugfs_register(gt, root); + intel_gt_engines_debugfs_register(gt, root); intel_gt_pm_debugfs_register(gt, root); intel_sseu_debugfs_register(gt, root); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h index 8b6fca09897c..e307ceb99031 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root, const struct intel_gt_debugfs_file *files, unsigned long count, void *data); +/* functions that need to be accessed by the upper level non-gt interfaces */ +int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val); +int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val); + #endif /* INTEL_GT_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index d0588d8aaa44..bc898df7a48c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) return intel_wakeref_get_if_active(>->wakeref); } +static inline void intel_gt_pm_might_get(struct intel_gt *gt) +{ + intel_wakeref_might_get(>->wakeref); +} + static inline void intel_gt_pm_put(struct intel_gt *gt) { intel_wakeref_put(>->wakeref); @@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt) intel_wakeref_put_async(>->wakeref); } +static inline void intel_gt_pm_might_put(struct intel_gt *gt) +{ + intel_wakeref_might_put(>->wakeref); +} + +#define with_intel_gt_pm(gt, tmp) \ + for (tmp = 1, intel_gt_pm_get(gt); tmp; \ + intel_gt_pm_put(gt), tmp = 0) + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index b3ddb9106125..404dfa7673c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -20,6 +20,46 @@ #include "intel_uncore.h" #include "vlv_sideband.h" +int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) +{ + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (GRAPHICS_VER(gt->i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); + + return 0; +} + +int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) >= 6) + intel_uncore_forcewake_user_put(gt->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); + + return 0; +} + +static int forcewake_user_open(struct inode *inode, struct file *file) +{ + struct intel_gt *gt = inode->i_private; + + return intel_gt_pm_debugfs_forcewake_user_open(gt); +} + +static int forcewake_user_release(struct inode *inode, struct file *file) +{ + struct intel_gt *gt = inode->i_private; + + return intel_gt_pm_debugfs_forcewake_user_release(gt); +} + +static const struct file_operations forcewake_user_fops = { + .owner = THIS_MODULE, + .open = forcewake_user_open, + .release = forcewake_user_release, +}; + static int fw_domains_show(struct seq_file *m, void *data) { struct intel_gt *gt = m->private; @@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { "drpc", &drpc_fops, NULL }, { "frequency", &frequency_fops, NULL }, { "forcewake", &fw_domains_fops, NULL }, + { "forcewake_user", &forcewake_user_fops, NULL}, { "llc", &llc_fops, llc_eval }, { "rps_boost", &rps_boost_fops, rps_eval }, }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h index 2b824289582b..a8457887ec65 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h @@ -13,4 +13,8 @@ struct drm_printer; void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m); +/* functions that need to be accessed by the upper level non-gt interfaces */ +int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt); +int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt); + #endif /* INTEL_GT_PM_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 9ca88f8ac5dd..08d7d5ae263a 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -3,6 +3,7 @@ * Copyright © 2019 Intel Corporation */ +#include <asm/tsc.h> #include <linux/cpufreq.h> #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3ef9eaf8c50e..56156cf18c41 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } + if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { + ce->parallel.guc.parent_page = context_size / PAGE_SIZE; + context_size += PARENT_SCRATCH_SIZE; + } + obj = i915_gem_object_create_lmem(engine->i915, context_size, I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 593524195707..586dca1731ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine); /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); intel_engine_reset_pinned_contexts(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 1257f4f11e66..438bbc7b8147 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline) timeline->hwsp_map = vaddr; timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); - clflush(vaddr + ofs); + drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); return 0; } @@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl) memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); WRITE_ONCE(*hwsp_seqno, tl->seqno); - clflush(hwsp_seqno); + drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); } void intel_timeline_enter(struct intel_timeline *tl) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 25a8c4f62b0d..b367ecfa42de 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ve[n] = intel_engine_create_virtual(siblings, nsibling); + ve[n] = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve[n])) { err = PTR_ERR(ve[n]); nctx = n; @@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, i915_request_add(rq); } - ce = intel_engine_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, /* XXX We do not handle oversubscription and fairness with normal rq */ for (n = 0; n < nsibling; n++) { - ce = intel_engine_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (err) goto out_scratch; - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ve = intel_engine_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling, 0); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_spin; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 8ff582222aff..ba10bd374cee 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..6e228343e8cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* + * Ensure intel_uncore_write_fw can be used rather than + * intel_uncore_write. + */ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* + * This register is used by the i915 and GuC for MMIO based + * communication. Once we are in this code CTBs are the only + * method the i915 uses to communicate with the GuC so it is + * safe to write to this register (a value of 0 is NOP for MMIO + * communication). If we ever start mixing CTBs and MMIOs a new + * register will have to be chosen. This function is also used + * to enforce ordering of a work queue item write and an update + * to the process descriptor. When a work queue is being used, + * CTBs are also the only mechanism of communication. + */ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 5dd174babf7a..31cf9fb48c7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,15 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + /** + * @submission_stall_reason: reason why submission is stalled + */ + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -71,16 +80,41 @@ struct intel_guc { } interrupts; /** - * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and - * ce->guc_id.ref when transitioning in and out of zero - */ - spinlock_t contexts_lock; - /** @guc_ids: used to allocate unique ce->guc_id.id values */ - struct ida guc_ids; - /** - * @guc_id_list: list of intel_context with valid guc_ids but no refs + * @submission_state: sub-structure for submission state protected by + * single lock */ - struct list_head guc_id_list; + struct { + /** + * @lock: protects everything in submission_state, + * ce->guc_id.id, and ce->guc_id.ref when transitioning in and + * out of zero + */ + spinlock_t lock; + /** + * @guc_ids: used to allocate new guc_ids, single-lrc + */ + struct ida guc_ids; + /** + * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc + */ + unsigned long *guc_ids_bitmap; + /** + * @guc_id_list: list of intel_context with valid guc_ids but no + * refs + */ + struct list_head guc_id_list; + /** + * @destroyed_contexts: list of contexts waiting to be destroyed + * (deregistered with the GuC) + */ + struct list_head destroyed_contexts; + /** + * @destroyed_worker: worker to deregister contexts, need as we + * need to take a GT PM reference and can't from destroy + * function as it might be in an atomic context (no sleeping) + */ + struct work_struct destroyed_worker; + } submission_state; /** * @submission_supported: tracks whether we support GuC submission on @@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 2c6ea64af7ec..621c893a009f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt, for_each_engine(engine, gt, id) { u8 guc_class = engine_class_to_guc_class(engine->class); - system_info->mapping_table[guc_class][engine->instance] = + system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] = engine->instance; } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0a3504bc0b61..a0cc34be7b56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* - * This register is used by the i915 and GuC for MMIO based - * communication. Once we are in this code CTBs are the only - * method the i915 uses to communicate with the GuC so it is - * safe to write to this register (a value of 0 is NOP for MMIO - * communication). If we ever start mixing CTBs and MMIOs a new - * register will have to be chosen. - */ - intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); - } else { - /* wmb() sufficient for a barrier if in smem */ - wmb(); - } -} - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, @@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct, * make sure H2G buffer update and LRC tail update (if this triggering a * submission) are visible before updating the descriptor tail */ - write_barrier(ct); + intel_guc_write_barrier(ct_to_guc(ct)); /* update local copies */ ctb->tail = tail; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index fa4be13c8854..722933e26347 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -52,27 +52,27 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_WQ_SIZE (PAGE_SIZE * 2) - -/* Work queue item header definitions */ +/* + * Work queue item header definitions + * + * Work queue is circular buffer used to submit complex (multi-lrc) submissions + * to the GuC. A work queue item is an entry in the circular buffer. + */ #define WQ_STATUS_ACTIVE 1 #define WQ_STATUS_SUSPENDED 2 #define WQ_STATUS_CMD_ERROR 3 #define WQ_STATUS_ENGINE_ID_NOT_USED 4 #define WQ_STATUS_SUSPENDED_FROM_RESET 5 -#define WQ_TYPE_SHIFT 0 -#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT) -#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT) -#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT) -#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT) -#define WQ_TARGET_SHIFT 10 -#define WQ_LEN_SHIFT 16 -#define WQ_NO_WCFLUSH_WAIT (1 << 27) -#define WQ_PRESENT_WORKLOAD (1 << 28) - -#define WQ_RING_TAIL_SHIFT 20 -#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ -#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT) +#define WQ_TYPE_BATCH_BUF 0x1 +#define WQ_TYPE_PSEUDO 0x2 +#define WQ_TYPE_INORDER 0x3 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) #define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) #define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) @@ -186,7 +186,7 @@ struct guc_process_desc { u32 wq_status; u32 engine_presence; u32 priority; - u32 reserved[30]; + u32 reserved[36]; } __packed; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ba0de35f6323..d7710debcd47 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -11,6 +11,7 @@ #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_heartbeat.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" @@ -68,7 +69,7 @@ * fence is used to stall all requests associated with this guc_id until the * corresponding G2H returns indicating the guc_id has been deregistered. * - * guc_ids: + * submission_state.guc_ids: * Unique number associated with private GuC context data passed in during * context registration / submission / deregistration. 64k available. Simple ida * is used for allocation. @@ -89,9 +90,9 @@ * sched_engine can be submitting at a time. Currently only one sched_engine is * used for all of GuC submission but that could change in the future. * - * guc->contexts_lock - * Protects guc_id allocation for the given GuC, i.e. only one context can be - * doing guc_id allocation operations at a time for each GuC in the system. + * guc->submission_state.lock + * Global lock for GuC submission state. Protects guc_ids and destroyed contexts + * list. * * ce->guc_state.lock * Protects everything under ce->guc_state. Ensures that a context is in the @@ -103,7 +104,7 @@ * * Lock ordering rules: * sched_engine->lock -> ce->guc_state.lock - * guc->contexts_lock -> ce->guc_state.lock + * guc->submission_state.lock -> ce->guc_state.lock * * Reset races: * When a full GT reset is triggered it is assumed that some G2H responses to @@ -124,11 +125,27 @@ struct guc_virtual_engine { }; static struct intel_context * -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags); + +static struct intel_context * +guc_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width); #define GUC_REQUEST_SIZE 64 /* bytes */ /* + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous + * per the GuC submission interface. A different allocation algorithm is used + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to + * partition the guc_id space. We believe the number of multi-lrc contexts in + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for + * multi-lrc. + */ +#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) + +/* * Below is a set of functions which control the GuC scheduling state which * require a lock. */ @@ -324,6 +341,12 @@ static inline void decr_context_committed_requests(struct intel_context *ce) GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); } +static struct intel_context * +request_to_scheduling_context(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id.id == GUC_INVALID_LRC_ID; @@ -344,6 +367,104 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } +/* + * When using multi-lrc submission a scratch memory area is reserved in the + * parent's context state for the process descriptor, work queue, and handshake + * between the parent + children contexts to insert safe preemption points + * between each of the BBs. Currently the scratch area is sized to a page. + * + * The layout of this scratch area is below: + * 0 guc_process_desc + * + sizeof(struct guc_process_desc) child go + * + CACHELINE_BYTES child join[0] + * ... + * + CACHELINE_BYTES child join[n - 1] + * ... unused + * PARENT_SCRATCH_SIZE / 2 work queue start + * ... work queue + * PARENT_SCRATCH_SIZE - 1 work queue end + */ +#define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +struct parent_scratch { + struct guc_process_desc pdesc; + + struct sync_semaphore go; + struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; + + u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - + sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; + + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +static u32 __get_parent_scratch_offset(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->parallel.guc.parent_page); + + return ce->parallel.guc.parent_page * PAGE_SIZE; +} + +static u32 __get_wq_offset(struct intel_context *ce) +{ + BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); + + return __get_parent_scratch_offset(ce) + WQ_OFFSET; +} + +static struct parent_scratch * +__get_parent_scratch(struct intel_context *ce) +{ + BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); + BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); + + /* + * Need to subtract LRC_STATE_OFFSET here as the + * parallel.guc.parent_page is the offset into ce->state while + * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. + */ + return (struct parent_scratch *) + (ce->lrc_reg_state + + ((__get_parent_scratch_offset(ce) - + LRC_STATE_OFFSET) / sizeof(u32))); +} + +static struct guc_process_desc * +__get_process_desc(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->pdesc; +} + +static u32 *get_wq_pointer(struct guc_process_desc *desc, + struct intel_context *ce, + u32 wqi_size) +{ + /* + * Check for space in work queue. Caching a value of head pointer in + * intel_context structure in order reduce the number accesses to shared + * GPU memory which may be across a PCIe bus. + */ +#define AVAILABLE_SPACE \ + CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) + if (wqi_size > AVAILABLE_SPACE) { + ce->parallel.guc.wqi_head = READ_ONCE(desc->head); + + if (wqi_size > AVAILABLE_SPACE) + return NULL; + } +#undef AVAILABLE_SPACE + + return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; +} + static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) { struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; @@ -503,10 +624,10 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); -static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) +static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) { int err = 0; - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u32 action[3]; int len = 0; u32 g2h_len_dw = 0; @@ -527,26 +648,17 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); - /* - * Corner case where the GuC firmware was blown away and reloaded while - * this context was pinned. - */ - if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) { - err = guc_lrc_desc_pin(ce, false); - if (unlikely(err)) - return err; - } - spin_lock(&ce->guc_state.lock); /* * The request / context will be run on the hardware when scheduling - * gets enabled in the unblock. + * gets enabled in the unblock. For multi-lrc we still submit the + * context to move the LRC tails. */ - if (unlikely(context_blocked(ce))) + if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) goto out; - enabled = context_enabled(ce); + enabled = context_enabled(ce) || context_blocked(ce); if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; @@ -565,6 +677,18 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_intel_context_sched_enable(ce); atomic_inc(&guc->outstanding_submission_g2h); set_context_enabled(ce); + + /* + * Without multi-lrc KMD does the submission step (moving the + * lrc tail) so enabling scheduling is sufficient to submit the + * context. This isn't the case in multi-lrc submission as the + * GuC needs to move the tails, hence the need for another H2G + * to submit a multi-lrc context after enabling scheduling. + */ + if (intel_context_is_parent(ce)) { + action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; + err = intel_guc_send_nb(guc, action, len - 1, 0); + } } else if (!enabled) { clr_context_pending_enable(ce); intel_context_put(ce); @@ -577,6 +701,18 @@ out: return err; } +static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) +{ + int ret = __guc_add_request(guc, rq); + + if (unlikely(ret == -EBUSY)) { + guc->stalled_request = rq; + guc->submission_stall_reason = STALL_ADD_REQUEST; + } + + return ret; +} + static inline void guc_set_lrc_tail(struct i915_request *rq) { rq->context->lrc_reg_state[CTX_RING_TAIL] = @@ -588,6 +724,135 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } +static bool is_multi_lrc_rq(struct i915_request *rq) +{ + return intel_context_is_parallel(rq->context); +} + +static bool can_merge_rq(struct i915_request *rq, + struct i915_request *last) +{ + return request_to_scheduling_context(rq) == + request_to_scheduling_context(last); +} + +static u32 wq_space_until_wrap(struct intel_context *ce) +{ + return (WQ_SIZE - ce->parallel.guc.wqi_tail); +} + +static void write_wqi(struct guc_process_desc *desc, + struct intel_context *ce, + u32 wqi_size) +{ + BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); + + /* + * Ensure WQI are visible before updating tail + */ + intel_guc_write_barrier(ce_to_guc(ce)); + + ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & + (WQ_SIZE - 1); + WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); +} + +static int guc_wq_noop_append(struct intel_context *ce) +{ + struct guc_process_desc *desc = __get_process_desc(ce); + u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); + u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; + + if (!wqi) + return -EBUSY; + + GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + ce->parallel.guc.wqi_tail = 0; + + return 0; +} + +static int __guc_wq_item_append(struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + struct intel_context *child; + struct guc_process_desc *desc = __get_process_desc(ce); + unsigned int wqi_size = (ce->parallel.number_children + 4) * + sizeof(u32); + u32 *wqi; + u32 len_dw = (wqi_size / sizeof(u32)) - 1; + int ret; + + /* Ensure context is in correct state updating work queue */ + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); + GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); + + /* Insert NOOP if this work queue item will wrap the tail pointer. */ + if (wqi_size > wq_space_until_wrap(ce)) { + ret = guc_wq_noop_append(ce); + if (ret) + return ret; + } + + wqi = get_wq_pointer(desc, ce, wqi_size); + if (!wqi) + return -EBUSY; + + GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + *wqi++ = ce->lrc.lrca; + *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | + FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); + *wqi++ = 0; /* fence_id */ + for_each_child(ce, child) + *wqi++ = child->ring->tail / sizeof(u64); + + write_wqi(desc, ce, wqi_size); + + return 0; +} + +static int guc_wq_item_append(struct intel_guc *guc, + struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + int ret = 0; + + if (likely(!intel_context_is_banned(ce))) { + ret = __guc_wq_item_append(rq); + + if (unlikely(ret == -EBUSY)) { + guc->stalled_request = rq; + guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; + } + } + + return ret; +} + +static bool multi_lrc_submit(struct i915_request *rq) +{ + struct intel_context *ce = request_to_scheduling_context(rq); + + intel_ring_set_tail(rq->ring, rq->tail); + + /* + * We expect the front end (execbuf IOCTL) to set this flag on the last + * request generated from a multi-BB submission. This indicates to the + * backend (GuC interface) that we should submit this context thus + * submitting all the requests generated in parallel. + */ + return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || + intel_context_is_banned(ce); +} + static int guc_dequeue_one_context(struct intel_guc *guc) { struct i915_sched_engine * const sched_engine = guc->sched_engine; @@ -601,7 +866,17 @@ static int guc_dequeue_one_context(struct intel_guc *guc) if (guc->stalled_request) { submit = true; last = guc->stalled_request; - goto resubmit; + + switch (guc->submission_stall_reason) { + case STALL_REGISTER_CONTEXT: + goto register_context; + case STALL_MOVE_LRC_TAIL: + goto move_lrc_tail; + case STALL_ADD_REQUEST: + goto add_request; + default: + MISSING_CASE(guc->submission_stall_reason); + } } while ((rb = rb_first_cached(&sched_engine->queue))) { @@ -609,8 +884,8 @@ static int guc_dequeue_one_context(struct intel_guc *guc) struct i915_request *rq, *rn; priolist_for_each_request_consume(rq, rn, p) { - if (last && rq->context != last->context) - goto done; + if (last && !can_merge_rq(rq, last)) + goto register_context; list_del_init(&rq->sched.link); @@ -618,33 +893,84 @@ static int guc_dequeue_one_context(struct intel_guc *guc) trace_i915_request_in(rq, 0); last = rq; - submit = true; + + if (is_multi_lrc_rq(rq)) { + /* + * We need to coalesce all multi-lrc requests in + * a relationship into a single H2G. We are + * guaranteed that all of these requests will be + * submitted sequentially. + */ + if (multi_lrc_submit(rq)) { + submit = true; + goto register_context; + } + } else { + submit = true; + } } rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } -done: + +register_context: if (submit) { - guc_set_lrc_tail(last); -resubmit: + struct intel_context *ce = request_to_scheduling_context(last); + + if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && + !intel_context_is_banned(ce))) { + ret = guc_lrc_desc_pin(ce, false); + if (unlikely(ret == -EPIPE)) { + goto deadlk; + } else if (ret == -EBUSY) { + guc->stalled_request = last; + guc->submission_stall_reason = + STALL_REGISTER_CONTEXT; + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ + goto deadlk; + } + } + +move_lrc_tail: + if (is_multi_lrc_rq(last)) { + ret = guc_wq_item_append(guc, last); + if (ret == -EBUSY) { + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ + goto deadlk; + } + } else { + guc_set_lrc_tail(last); + } + +add_request: ret = guc_add_request(guc, last); - if (unlikely(ret == -EPIPE)) + if (unlikely(ret == -EPIPE)) { + goto deadlk; + } else if (ret == -EBUSY) { + goto schedule_tasklet; + } else if (ret != 0) { + GEM_WARN_ON(ret); /* Unexpected */ goto deadlk; - else if (ret == -EBUSY) { - tasklet_schedule(&sched_engine->tasklet); - guc->stalled_request = last; - return false; } } guc->stalled_request = NULL; + guc->submission_stall_reason = STALL_NONE; return submit; deadlk: sched_engine->tasklet.callback = NULL; tasklet_disable_nosync(&sched_engine->tasklet); return false; + +schedule_tasklet: + tasklet_schedule(&sched_engine->tasklet); + return false; } static void guc_submission_tasklet(struct tasklet_struct *t) @@ -719,6 +1045,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) if (deregister) guc_signal_context_fence(ce); if (destroyed) { + intel_gt_pm_put_async(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -797,6 +1124,8 @@ static void guc_flush_submissions(struct intel_guc *guc) spin_unlock_irqrestore(&sched_engine->lock, flags); } +static void guc_flush_destroyed_contexts(struct intel_guc *guc); + void intel_guc_submission_reset_prepare(struct intel_guc *guc) { int i; @@ -815,6 +1144,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_unlock_irq(&guc_to_gt(guc)->irq_lock); guc_flush_submissions(guc); + guc_flush_destroyed_contexts(guc); /* * Handle any outstanding G2Hs before reset. Call IRQ handler directly @@ -929,10 +1259,15 @@ __unwind_incomplete_requests(struct intel_context *ce) static void __guc_reset_context(struct intel_context *ce, bool stalled) { + bool local_stalled; struct i915_request *rq; unsigned long flags; u32 head; + int i, number_children = ce->parallel.number_children; bool skip = false; + struct intel_context *parent = ce; + + GEM_BUG_ON(intel_context_is_child(ce)); intel_context_get(ce); @@ -958,25 +1293,38 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) if (unlikely(skip)) goto out_put; - rq = intel_context_find_active_request(ce); - if (!rq) { - head = ce->ring->tail; - stalled = false; - goto out_replay; - } + /* + * For each context in the relationship find the hanging request + * resetting each context / request as needed + */ + for (i = 0; i < number_children + 1; ++i) { + if (!intel_context_is_pinned(ce)) + goto next_context; - if (!i915_request_started(rq)) - stalled = false; + local_stalled = false; + rq = intel_context_find_active_request(ce); + if (!rq) { + head = ce->ring->tail; + goto out_replay; + } - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - head = intel_ring_wrap(ce->ring, rq->head); - __i915_request_reset(rq, stalled); + if (i915_request_started(rq)) + local_stalled = true; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + head = intel_ring_wrap(ce->ring, rq->head); + + __i915_request_reset(rq, local_stalled && stalled); out_replay: - guc_reset_state(ce, head, stalled); - __unwind_incomplete_requests(ce); + guc_reset_state(ce, head, local_stalled && stalled); +next_context: + if (i != number_children) + ce = list_next_entry(ce, parallel.child_link); + } + + __unwind_incomplete_requests(parent); out_put: - intel_context_put(ce); + intel_context_put(parent); } void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) @@ -997,7 +1345,8 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) xa_unlock(&guc->context_lookup); - if (intel_context_is_pinned(ce)) + if (intel_context_is_pinned(ce) && + !intel_context_is_child(ce)) __guc_reset_context(ce, stalled); intel_context_put(ce); @@ -1089,7 +1438,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) xa_unlock(&guc->context_lookup); - if (intel_context_is_pinned(ce)) + if (intel_context_is_pinned(ce) && + !intel_context_is_child(ce)) guc_cancel_context_requests(ce); intel_context_put(ce); @@ -1126,6 +1476,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) intel_gt_unpark_heartbeats(guc_to_gt(guc)); } +static void destroyed_worker_func(struct work_struct *w); + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1148,9 +1500,17 @@ int intel_guc_submission_init(struct intel_guc *guc) xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); - spin_lock_init(&guc->contexts_lock); - INIT_LIST_HEAD(&guc->guc_id_list); - ida_init(&guc->guc_ids); + spin_lock_init(&guc->submission_state.lock); + INIT_LIST_HEAD(&guc->submission_state.guc_id_list); + ida_init(&guc->submission_state.guc_ids); + INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); + INIT_WORK(&guc->submission_state.destroyed_worker, + destroyed_worker_func); + + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; return 0; } @@ -1160,8 +1520,10 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->lrc_desc_pool) return; + guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); + bitmap_free(guc->submission_state.guc_ids_bitmap); } static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1178,16 +1540,22 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, static int guc_bypass_tasklet_submit(struct intel_guc *guc, struct i915_request *rq) { - int ret; + int ret = 0; __i915_request_submit(rq); trace_i915_request_in(rq, 0); - guc_set_lrc_tail(rq); - ret = guc_add_request(guc, rq); - if (ret == -EBUSY) - guc->stalled_request = rq; + if (is_multi_lrc_rq(rq)) { + if (multi_lrc_submit(rq)) { + ret = guc_wq_item_append(guc, rq); + if (!ret) + ret = guc_add_request(guc, rq); + } + } else { + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + } if (unlikely(ret == -EPIPE)) disable_submission(guc); @@ -1195,6 +1563,16 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc, return ret; } +static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) +{ + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_context *ce = request_to_scheduling_context(rq); + + return submission_disabled(guc) || guc->stalled_request || + !i915_sched_engine_is_empty(sched_engine) || + !lrc_desc_registered(guc, ce->guc_id.id); +} + static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; @@ -1204,8 +1582,7 @@ static void guc_submit_request(struct i915_request *rq) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&sched_engine->lock, flags); - if (submission_disabled(guc) || guc->stalled_request || - !i915_sched_engine_is_empty(sched_engine)) + if (need_tasklet(guc, rq)) queue_request(sched_engine, rq, rq_prio(rq)); else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) tasklet_hi_schedule(&sched_engine->tasklet); @@ -1213,17 +1590,43 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static int new_guc_id(struct intel_guc *guc) +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { - return ida_simple_get(&guc->guc_ids, 0, - GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | - __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + int ret; + + GEM_BUG_ON(intel_context_is_child(ce)); + + if (intel_context_is_parent(ce)) + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, + NUMBER_MULTI_LRC_GUC_ID, + order_base_2(ce->parallel.number_children + + 1)); + else + ret = ida_simple_get(&guc->submission_state.guc_ids, + NUMBER_MULTI_LRC_GUC_ID, + GUC_MAX_LRC_DESCRIPTORS, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN); + if (unlikely(ret < 0)) + return ret; + + ce->guc_id.id = ret; + return 0; } static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) { + GEM_BUG_ON(intel_context_is_child(ce)); + if (!context_guc_id_invalid(ce)) { - ida_simple_remove(&guc->guc_ids, ce->guc_id.id); + if (intel_context_is_parent(ce)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + ce->guc_id.id, + order_base_2(ce->parallel.number_children + + 1)); + else + ida_simple_remove(&guc->submission_state.guc_ids, + ce->guc_id.id); reset_lrc_desc(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -1235,54 +1638,69 @@ static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - spin_lock_irqsave(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); __release_guc_id(guc, ce); - spin_unlock_irqrestore(&guc->contexts_lock, flags); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int steal_guc_id(struct intel_guc *guc) +static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) { - struct intel_context *ce; - int guc_id; + struct intel_context *cn; - lockdep_assert_held(&guc->contexts_lock); + lockdep_assert_held(&guc->submission_state.lock); + GEM_BUG_ON(intel_context_is_child(ce)); + GEM_BUG_ON(intel_context_is_parent(ce)); - if (!list_empty(&guc->guc_id_list)) { - ce = list_first_entry(&guc->guc_id_list, + if (!list_empty(&guc->submission_state.guc_id_list)) { + cn = list_first_entry(&guc->submission_state.guc_id_list, struct intel_context, guc_id.link); - GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); - GEM_BUG_ON(context_guc_id_invalid(ce)); + GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); + GEM_BUG_ON(context_guc_id_invalid(cn)); + GEM_BUG_ON(intel_context_is_child(cn)); + GEM_BUG_ON(intel_context_is_parent(cn)); - list_del_init(&ce->guc_id.link); - guc_id = ce->guc_id.id; + list_del_init(&cn->guc_id.link); + ce->guc_id = cn->guc_id; spin_lock(&ce->guc_state.lock); - clr_context_registered(ce); + clr_context_registered(cn); spin_unlock(&ce->guc_state.lock); - set_context_guc_id_invalid(ce); - return guc_id; + set_context_guc_id_invalid(cn); + + return 0; } else { return -EAGAIN; } } -static int assign_guc_id(struct intel_guc *guc, u16 *out) +static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; - lockdep_assert_held(&guc->contexts_lock); + lockdep_assert_held(&guc->submission_state.lock); + GEM_BUG_ON(intel_context_is_child(ce)); - ret = new_guc_id(guc); + ret = new_guc_id(guc, ce); if (unlikely(ret < 0)) { - ret = steal_guc_id(guc); + if (intel_context_is_parent(ce)) + return -ENOSPC; + + ret = steal_guc_id(guc, ce); if (ret < 0) return ret; } - *out = ret; + if (intel_context_is_parent(ce)) { + struct intel_context *child; + int i = 1; + + for_each_child(ce, child) + child->guc_id.id = ce->guc_id.id + i++; + } + return 0; } @@ -1295,12 +1713,12 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); try_again: - spin_lock_irqsave(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); might_lock(&ce->guc_state.lock); if (context_guc_id_invalid(ce)) { - ret = assign_guc_id(guc, &ce->guc_id.id); + ret = assign_guc_id(guc, ce); if (ret) goto out_unlock; ret = 1; /* Indidcates newly assigned guc_id */ @@ -1310,7 +1728,7 @@ try_again: atomic_inc(&ce->guc_id.ref); out_unlock: - spin_unlock_irqrestore(&guc->contexts_lock, flags); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); /* * -EAGAIN indicates no guc_id are available, let's retire any @@ -1342,15 +1760,42 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) unsigned long flags; GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); + GEM_BUG_ON(intel_context_is_child(ce)); - if (unlikely(context_guc_id_invalid(ce))) + if (unlikely(context_guc_id_invalid(ce) || + intel_context_is_parent(ce))) return; - spin_lock_irqsave(&guc->contexts_lock, flags); + spin_lock_irqsave(&guc->submission_state.lock, flags); if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && !atomic_read(&ce->guc_id.ref)) - list_add_tail(&ce->guc_id.link, &guc->guc_id_list); - spin_unlock_irqrestore(&guc->contexts_lock, flags); + list_add_tail(&ce->guc_id.link, + &guc->submission_state.guc_id_list); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static int __guc_action_register_multi_lrc(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); } static int __guc_action_register_context(struct intel_guc *guc, @@ -1375,9 +1820,15 @@ static int register_context(struct intel_context *ce, bool loop) ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop); + if (intel_context_is_parent(ce)) + ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, + offset, loop); + else + ret = __guc_action_register_context(guc, ce->guc_id.id, offset, + loop); if (likely(!ret)) { unsigned long flags; @@ -1406,26 +1857,31 @@ static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_deregister(ce); return __guc_action_deregister_context(guc, guc_id); } -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) +static inline void clear_children_join_go_memory(struct intel_context *ce) { - switch (class) { - case RENDER_CLASS: - return mask >> RCS0; - case VIDEO_ENHANCEMENT_CLASS: - return mask >> VECS0; - case VIDEO_DECODE_CLASS: - return mask >> VCS0; - case COPY_ENGINE_CLASS: - return mask >> BCS0; - default: - MISSING_CASE(class); - return 0; - } + struct parent_scratch *ps = __get_parent_scratch(ce); + int i; + + ps->go.semaphore = 0; + for (i = 0; i < ce->parallel.number_children + 1; ++i) + ps->join[i].semaphore = 0; +} + +static inline u32 get_children_go_value(struct intel_context *ce) +{ + return __get_parent_scratch(ce)->go.semaphore; +} + +static inline u32 get_children_join_value(struct intel_context *ce, + u8 child_index) +{ + return __get_parent_scratch(ce)->join[child_index].semaphore; } static void guc_context_policy_init(struct intel_engine_cs *engine, @@ -1450,6 +1906,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct guc_lrc_desc *desc; bool context_registered; intel_wakeref_t wakeref; + struct intel_context *child; int ret = 0; GEM_BUG_ON(!engine->mask); @@ -1469,14 +1926,50 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc = __get_lrc_desc(guc, desc_idx); desc->engine_class = engine_class_to_guc_class(engine->class); - desc->engine_submit_mask = adjust_engine_mask(engine->class, - engine->mask); + desc->engine_submit_mask = engine->logical_mask; desc->hw_context_desc = ce->lrc.lrca; desc->priority = ce->guc_state.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + for_each_child(ce, child) { + desc = __get_lrc_desc(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init(engine, desc); + } + + clear_children_join_go_memory(ce); + } + + /* * The context_lookup xarray is used to determine if the hardware * context is currently registered. There are two cases in which it * could be registered either the guc_id has been stolen from another @@ -1559,7 +2052,12 @@ static int guc_context_pre_pin(struct intel_context *ce, static int guc_context_pin(struct intel_context *ce, void *vaddr) { - return __guc_context_pin(ce, ce->engine, vaddr); + int ret = __guc_context_pin(ce, ce->engine, vaddr); + + if (likely(!ret && !intel_context_is_barrier(ce))) + intel_engine_pm_get(ce->engine); + + return ret; } static void guc_context_unpin(struct intel_context *ce) @@ -1568,6 +2066,9 @@ static void guc_context_unpin(struct intel_context *ce) unpin_guc_id(guc, ce); lrc_unpin(ce); + + if (likely(!intel_context_is_barrier(ce))) + intel_engine_pm_put_async(ce->engine); } static void guc_context_post_unpin(struct intel_context *ce) @@ -1602,6 +2103,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_sched_disable(ce); guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), @@ -1653,6 +2155,8 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) u16 guc_id; bool enabled; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); incr_context_blocked(ce); @@ -1707,6 +2211,7 @@ static void guc_context_unblock(struct intel_context *ce) bool enable; GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_child(ce)); spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -1733,11 +2238,14 @@ static void guc_context_unblock(struct intel_context *ce) static void guc_context_cancel_request(struct intel_context *ce, struct i915_request *rq) { + struct intel_context *block_context = + request_to_scheduling_context(rq); + if (i915_sw_fence_signaled(&rq->submit)) { struct i915_sw_fence *fence; intel_context_get(ce); - fence = guc_context_block(ce); + fence = guc_context_block(block_context); i915_sw_fence_wait(fence); if (!i915_request_completed(rq)) { __i915_request_skip(rq); @@ -1751,7 +2259,7 @@ static void guc_context_cancel_request(struct intel_context *ce, */ flush_work(&ce_to_guc(ce)->ct.requests.worker); - guc_context_unblock(ce); + guc_context_unblock(block_context); intel_context_put(ce); } } @@ -1777,6 +2285,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) intel_wakeref_t wakeref; unsigned long flags; + GEM_BUG_ON(intel_context_is_child(ce)); + guc_flush_submissions(guc); spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -1827,6 +2337,8 @@ static void guc_context_sched_disable(struct intel_context *ce) intel_wakeref_t wakeref; u16 guc_id; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); /* @@ -1857,11 +2369,30 @@ unpin: static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); + struct intel_gt *gt = guc_to_gt(guc); + unsigned long flags; + bool disabled; + GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { + __intel_gt_pm_get(gt); + set_context_destroyed(ce); + clr_context_registered(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + deregister_context(ce, ce->guc_id.id); } @@ -1889,78 +2420,86 @@ static void __guc_context_destroy(struct intel_context *ce) } } +static void guc_flush_destroyed_contexts(struct intel_guc *guc) +{ + struct intel_context *ce, *cn; + unsigned long flags; + + GEM_BUG_ON(!submission_disabled(guc) && + guc_submission_initialized(guc)); + + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_for_each_entry_safe(ce, cn, + &guc->submission_state.destroyed_contexts, + destroyed_link) { + list_del_init(&ce->destroyed_link); + __release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static void deregister_destroyed_contexts(struct intel_guc *guc) +{ + struct intel_context *ce, *cn; + unsigned long flags; + + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_for_each_entry_safe(ce, cn, + &guc->submission_state.destroyed_contexts, + destroyed_link) { + list_del_init(&ce->destroyed_link); + guc_lrc_desc_unpin(ce); + } + spin_unlock_irqrestore(&guc->submission_state.lock, flags); +} + +static void destroyed_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, + submission_state.destroyed_worker); + struct intel_gt *gt = guc_to_gt(guc); + int tmp; + + with_intel_gt_pm(gt, tmp) + deregister_destroyed_contexts(guc); +} + static void guc_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); - struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; struct intel_guc *guc = ce_to_guc(ce); - intel_wakeref_t wakeref; unsigned long flags; - bool disabled; + bool destroy; /* * If the guc_id is invalid this context has been stolen and we can free * it immediately. Also can be freed immediately if the context is not * registered with the GuC or the GuC is in the middle of a reset. */ - if (context_guc_id_invalid(ce)) { - __guc_context_destroy(ce); - return; - } else if (submission_disabled(guc) || - !lrc_desc_registered(guc, ce->guc_id.id)) { - release_guc_id(guc, ce); - __guc_context_destroy(ce); - return; - } - - /* - * We have to acquire the context spinlock and check guc_id again, if it - * is valid it hasn't been stolen and needs to be deregistered. We - * delete this context from the list of unpinned guc_id available to - * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB - * returns indicating this context has been deregistered the guc_id is - * returned to the pool of available guc_id. - */ - spin_lock_irqsave(&guc->contexts_lock, flags); - if (context_guc_id_invalid(ce)) { - spin_unlock_irqrestore(&guc->contexts_lock, flags); - __guc_context_destroy(ce); - return; - } - - if (!list_empty(&ce->guc_id.link)) - list_del_init(&ce->guc_id.link); - spin_unlock_irqrestore(&guc->contexts_lock, flags); - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) { - set_context_destroyed(ce); - clr_context_registered(ce); + spin_lock_irqsave(&guc->submission_state.lock, flags); + destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || + !lrc_desc_registered(guc, ce->guc_id.id); + if (likely(!destroy)) { + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + list_add_tail(&ce->destroyed_link, + &guc->submission_state.destroyed_contexts); + } else { + __release_guc_id(guc, ce); } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - release_guc_id(guc, ce); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + if (unlikely(destroy)) { __guc_context_destroy(ce); return; } /* - * We defer GuC context deregistration until the context is destroyed - * in order to save on CTBs. With this optimization ideally we only need - * 1 CTB to register the context during the first pin and 1 CTB to - * deregister the context when the context is destroyed. Without this - * optimization, a CTB would be needed every pin & unpin. - * - * XXX: Need to acqiure the runtime wakeref as this can be triggered - * from context_free_worker when runtime wakeref is not held. - * guc_lrc_desc_unpin requires the runtime as a GuC register is written - * in H2G CTB to deregister the context. A future patch may defer this - * H2G CTB if the runtime wakeref is zero. + * We use a worker to issue the H2G to deregister the context as we can + * take the GT PM for the first time which isn't allowed from an atomic + * context. */ - with_intel_runtime_pm(runtime_pm, wakeref) - guc_lrc_desc_unpin(ce); + queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); } static int guc_context_alloc(struct intel_context *ce) @@ -2056,9 +2595,10 @@ static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) static void add_to_context(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + GEM_BUG_ON(intel_context_is_child(ce)); GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); spin_lock(&ce->guc_state.lock); @@ -2091,7 +2631,9 @@ static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) static void remove_from_context(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); + + GEM_BUG_ON(intel_context_is_child(ce)); spin_lock_irq(&ce->guc_state.lock); @@ -2132,6 +2674,7 @@ static const struct intel_context_ops guc_context_ops = { .destroy = guc_context_destroy, .create_virtual = guc_create_virtual, + .create_parallel = guc_create_parallel, }; static void submit_work_cb(struct irq_work *wrk) @@ -2168,6 +2711,8 @@ static void guc_signal_context_fence(struct intel_context *ce) { unsigned long flags; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_wait_for_deregister_to_register(ce); __guc_signal_context_fence(ce); @@ -2198,7 +2743,7 @@ static void guc_context_init(struct intel_context *ce) static int guc_request_alloc(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); struct intel_guc *guc = ce_to_guc(ce); unsigned long flags; int ret; @@ -2302,8 +2847,30 @@ static int guc_virtual_context_pre_pin(struct intel_context *ce, static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) { struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + int ret = __guc_context_pin(ce, engine, vaddr); + intel_engine_mask_t tmp, mask = ce->engine->mask; - return __guc_context_pin(ce, engine, vaddr); + if (likely(!ret)) + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_get(engine); + + return ret; +} + +static void guc_virtual_context_unpin(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + + unpin_guc_id(guc, ce); + lrc_unpin(ce); + + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_put_async(engine); } static void guc_virtual_context_enter(struct intel_context *ce) @@ -2340,7 +2907,98 @@ static const struct intel_context_ops virtual_guc_context_ops = { .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, - .unpin = guc_context_unpin, + .unpin = guc_virtual_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .sched_disable = guc_context_sched_disable, + + .destroy = guc_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + struct intel_guc *guc = ce_to_guc(ce); + int ret; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + ret = pin_guc_id(guc, ce); + if (unlikely(ret < 0)) + return ret; + + return __guc_context_pin(ce, engine, vaddr); +} + +static int guc_child_context_pin(struct intel_context *ce, void *vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + __intel_context_pin(ce->parallel.parent); + return __guc_context_pin(ce, engine, vaddr); +} + +static void guc_parent_context_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + if (ce->parallel.last_rq) + i915_request_put(ce->parallel.last_rq); + unpin_guc_id(guc, ce); + lrc_unpin(ce); +} + +static void guc_child_context_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(context_enabled(ce)); + GEM_BUG_ON(intel_context_is_barrier(ce)); + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + lrc_unpin(ce); +} + +static void guc_child_context_post_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_child(ce)); + GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); + GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); + + lrc_post_unpin(ce); + intel_context_unpin(ce->parallel.parent); +} + +static void guc_child_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + + __guc_context_destroy(ce); +} + +static const struct intel_context_ops virtual_parent_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_context_pre_pin, + .pin = guc_parent_context_pin, + .unpin = guc_parent_context_unpin, .post_unpin = guc_context_post_unpin, .ban = guc_context_ban, @@ -2357,6 +3015,110 @@ static const struct intel_context_ops virtual_guc_context_ops = { .get_sibling = guc_virtual_get_sibling, }; +static const struct intel_context_ops virtual_child_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_context_pre_pin, + .pin = guc_child_context_pin, + .unpin = guc_child_context_unpin, + .post_unpin = guc_child_context_post_unpin, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .destroy = guc_child_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +/* + * The below override of the breadcrumbs is enabled when the user configures a + * context for parallel submission (multi-lrc, parent-child). + * + * The overridden breadcrumbs implements an algorithm which allows the GuC to + * safely preempt all the hw contexts configured for parallel submission + * between each BB. The contract between the i915 and GuC is if the parent + * context can be preempted, all the children can be preempted, and the GuC will + * always try to preempt the parent before the children. A handshake between the + * parent / children breadcrumbs ensures the i915 holds up its end of the deal + * creating a window to preempt between each set of BBs. + */ +static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +static u32 * +emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs); +static u32 * +emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs); + +static struct intel_context * +guc_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_engine_cs **siblings = NULL; + struct intel_context *parent = NULL, *ce, *err; + int i, j; + + siblings = kmalloc_array(num_siblings, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < width; ++i) { + for (j = 0; j < num_siblings; ++j) + siblings[j] = engines[i * num_siblings + j]; + + ce = intel_engine_create_virtual(siblings, num_siblings, + FORCE_VIRTUAL); + if (!ce) { + err = ERR_PTR(-ENOMEM); + goto unwind; + } + + if (i == 0) { + parent = ce; + parent->ops = &virtual_parent_context_ops; + } else { + ce->ops = &virtual_child_context_ops; + intel_context_bind_parent_child(parent, ce); + } + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + parent->engine->emit_bb_start = + emit_bb_start_parent_no_preempt_mid_batch; + parent->engine->emit_fini_breadcrumb = + emit_fini_breadcrumb_parent_no_preempt_mid_batch; + parent->engine->emit_fini_breadcrumb_dw = + 12 + 4 * parent->parallel.number_children; + for_each_child(parent, ce) { + ce->engine->emit_bb_start = + emit_bb_start_child_no_preempt_mid_batch; + ce->engine->emit_fini_breadcrumb = + emit_fini_breadcrumb_child_no_preempt_mid_batch; + ce->engine->emit_fini_breadcrumb_dw = 16; + } + + kfree(siblings); + return parent; + +unwind: + if (parent) + intel_context_put(parent); + kfree(siblings); + return err; +} + static bool guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) { @@ -2416,7 +3178,7 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine) static void guc_bump_inflight_request_prio(struct i915_request *rq, int prio) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); /* Short circuit function */ @@ -2439,7 +3201,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, static void guc_retire_inflight_request_prio(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); spin_lock(&ce->guc_state.lock); guc_prio_fini(rq, ce); @@ -2753,6 +3515,12 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return NULL; } + if (unlikely(intel_context_is_child(ce))) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Context is child, desc_idx %u", desc_idx); + return NULL; + } + return ce; } @@ -2796,6 +3564,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ + intel_gt_pm_put_async(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -3122,6 +3891,25 @@ static inline void guc_log_context_priority(struct drm_printer *p, drm_printf(p, "\n"); } +static inline void guc_log_context(struct drm_printer *p, + struct intel_context *ce) +{ + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); + drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + drm_printf(p, "\t\tContext Pin Count: %u\n", + atomic_read(&ce->pin_count)); + drm_printf(p, "\t\tGuC ID Ref Count: %u\n", + atomic_read(&ce->guc_id.ref)); + drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + ce->guc_state.sched_state); +} + void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p) { @@ -3131,28 +3919,310 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); - drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); - drm_printf(p, "\t\tContext Pin Count: %u\n", - atomic_read(&ce->pin_count)); - drm_printf(p, "\t\tGuC ID Ref Count: %u\n", - atomic_read(&ce->guc_id.ref)); - drm_printf(p, "\t\tSchedule State: 0x%x\n\n", - ce->guc_state.sched_state); + GEM_BUG_ON(intel_context_is_child(ce)); + guc_log_context(p, ce); guc_log_context_priority(p, ce); + + if (intel_context_is_parent(ce)) { + struct guc_process_desc *desc = __get_process_desc(ce); + struct intel_context *child; + + drm_printf(p, "\t\tNumber children: %u\n", + ce->parallel.number_children); + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(desc->head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(desc->tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(desc->wq_status)); + + if (ce->engine->emit_bb_start == + emit_bb_start_parent_no_preempt_mid_batch) { + u8 i; + + drm_printf(p, "\t\tChildren Go: %u\n\n", + get_children_go_value(ce)); + for (i = 0; i < ce->parallel.number_children; ++i) + drm_printf(p, "\t\tChildren Join: %u\n", + get_children_join_value(ce, i)); + } + + for_each_child(ce, child) + guc_log_context(p, child); + } } xa_unlock_irqrestore(&guc->context_lookup, flags); } +static inline u32 get_children_go_addr(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + return i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce) + + offsetof(struct parent_scratch, go.semaphore); +} + +static inline u32 get_children_join_addr(struct intel_context *ce, + u8 child_index) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + return i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce) + + offsetof(struct parent_scratch, join[child_index].semaphore); +} + +#define PARENT_GO_BB 1 +#define PARENT_GO_FINI_BREADCRUMB 0 +#define CHILD_GO_BB 1 +#define CHILD_GO_FINI_BREADCRUMB 0 +static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + struct intel_context *ce = rq->context; + u32 *cs; + u8 i; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Wait on children */ + for (i = 0; i < ce->parallel.number_children; ++i) { + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = PARENT_GO_BB; + *cs++ = get_children_join_addr(ce, i); + *cs++ = 0; + } + + /* Turn off preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + + /* Tell children go */ + cs = gen8_emit_ggtt_write(cs, + CHILD_GO_BB, + get_children_go_addr(ce), + 0); + + /* Jump to batch */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + struct intel_context *ce = rq->context; + struct intel_context *parent = intel_context_to_parent(ce); + u32 *cs; + + GEM_BUG_ON(!intel_context_is_child(ce)); + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Signal parent */ + cs = gen8_emit_ggtt_write(cs, + PARENT_GO_BB, + get_children_join_addr(parent, + ce->parallel.child_index), + 0); + + /* Wait on parent for go */ + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = CHILD_GO_BB; + *cs++ = get_children_go_addr(parent); + *cs++ = 0; + + /* Turn off preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* Jump to batch */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 * +__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + u8 i; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + /* Wait on children */ + for (i = 0; i < ce->parallel.number_children; ++i) { + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = PARENT_GO_FINI_BREADCRUMB; + *cs++ = get_children_join_addr(ce, i); + *cs++ = 0; + } + + /* Turn on preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + /* Tell children go */ + cs = gen8_emit_ggtt_write(cs, + CHILD_GO_FINI_BREADCRUMB, + get_children_go_addr(ce), + 0); + + return cs; +} + +/* + * If this true, a submission of multi-lrc requests had an error and the + * requests need to be skipped. The front end (execuf IOCTL) should've called + * i915_request_skip which squashes the BB but we still need to emit the fini + * breadrcrumbs seqno write. At this point we don't know how many of the + * requests in the multi-lrc submission were generated so we can't do the + * handshake between the parent and children (e.g. if 4 requests should be + * generated but 2nd hit an error only 1 would be seen by the GuC backend). + * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error + * has occurred on any of the requests in submission / relationship. + */ +static inline bool skip_handshake(struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); +} + +static u32 * +emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + if (unlikely(skip_handshake(rq))) { + /* + * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, + * the -6 comes from the length of the emits below. + */ + memset(cs, 0, sizeof(u32) * + (ce->engine->emit_fini_breadcrumb_dw - 6)); + cs += ce->engine->emit_fini_breadcrumb_dw - 6; + } else { + cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); + } + + /* Emit fini breadcrumb */ + cs = gen8_emit_ggtt_write(cs, + rq->fence.seqno, + i915_request_active_timeline(rq)->hwsp_offset, + 0); + + /* User interrupt */ + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + + return cs; +} + +static u32 * +__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + struct intel_context *parent = intel_context_to_parent(ce); + + GEM_BUG_ON(!intel_context_is_child(ce)); + + /* Turn on preemption */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + /* Signal parent */ + cs = gen8_emit_ggtt_write(cs, + PARENT_GO_FINI_BREADCRUMB, + get_children_join_addr(parent, + ce->parallel.child_index), + 0); + + /* Wait parent on for go */ + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD); + *cs++ = CHILD_GO_FINI_BREADCRUMB; + *cs++ = get_children_go_addr(parent); + *cs++ = 0; + + return cs; +} + +static u32 * +emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, + u32 *cs) +{ + struct intel_context *ce = rq->context; + + GEM_BUG_ON(!intel_context_is_child(ce)); + + if (unlikely(skip_handshake(rq))) { + /* + * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, + * the -6 comes from the length of the emits below. + */ + memset(cs, 0, sizeof(u32) * + (ce->engine->emit_fini_breadcrumb_dw - 6)); + cs += ce->engine->emit_fini_breadcrumb_dw - 6; + } else { + cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); + } + + /* Emit fini breadcrumb */ + cs = gen8_emit_ggtt_write(cs, + rq->fence.seqno, + i915_request_active_timeline(rq)->hwsp_offset, + 0); + + /* User interrupt */ + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + + return cs; +} + static struct intel_context * -guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, + unsigned long flags) { struct guc_virtual_engine *ve; struct intel_guc *guc; @@ -3201,6 +4271,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) } ve->base.mask |= sibling->mask; + ve->base.logical_mask |= sibling->logical_mask; if (n != 0 && ve->base.class != sibling->class) { DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", @@ -3259,4 +4330,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_guc.c" +#include "selftest_guc_multi_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c new file mode 100644 index 000000000000..50953c8e8b53 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2019 Intel Corporation + */ + +#include "selftests/igt_spinner.h" +#include "selftests/igt_reset.h" +#include "selftests/intel_scheduler_helpers.h" +#include "gt/intel_engine_heartbeat.h" +#include "gem/selftests/mock_context.h" + +static void logical_sort(struct intel_engine_cs **engines, int num_engines) +{ + struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1]; + int i, j; + + for (i = 0; i < num_engines; ++i) + for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) { + if (engines[j]->logical_mask & BIT(i)) { + sorted[i] = engines[j]; + break; + } + } + + memcpy(*engines, *sorted, + sizeof(struct intel_engine_cs *) * num_engines); +} + +static struct intel_context * +multi_lrc_create_parent(struct intel_gt *gt, u8 class, + unsigned long flags) +{ + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int i = 0; + + for_each_engine(engine, gt, id) { + if (engine->class != class) + continue; + + siblings[i++] = engine; + } + + if (i <= 1) + return ERR_PTR(0); + + logical_sort(siblings, i); + + return intel_engine_create_parallel(siblings, 1, i); +} + +static void multi_lrc_context_unpin(struct intel_context *ce) +{ + struct intel_context *child; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + for_each_child(ce, child) + intel_context_unpin(child); + intel_context_unpin(ce); +} + +static void multi_lrc_context_put(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_parent(ce)); + + /* + * Only the parent gets the creation ref put in the uAPI, the parent + * itself is responsible for creation ref put on the children. + */ + intel_context_put(ce); +} + +static struct i915_request * +multi_lrc_nop_request(struct intel_context *ce) +{ + struct intel_context *child; + struct i915_request *rq, *child_rq; + int i = 0; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + for_each_child(ce, child) { + child_rq = intel_context_create_request(child); + if (IS_ERR(child_rq)) + goto child_error; + + if (++i == ce->parallel.number_children) + set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, + &child_rq->fence.flags); + i915_request_add(child_rq); + } + + return rq; + +child_error: + i915_request_put(rq); + + return ERR_PTR(-ENOMEM); +} + +static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class) +{ + struct intel_context *parent; + struct i915_request *rq; + int ret; + + parent = multi_lrc_create_parent(gt, class, 0); + if (IS_ERR(parent)) { + pr_err("Failed creating contexts: %ld", PTR_ERR(parent)); + return PTR_ERR(parent); + } else if (!parent) { + pr_debug("Not enough engines in class: %d", class); + return 0; + } + + rq = multi_lrc_nop_request(parent); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed creating requests: %d", ret); + goto out; + } + + ret = intel_selftest_wait_for_rq(rq); + if (ret) + pr_err("Failed waiting on request: %d", ret); + + i915_request_put(rq); + + if (ret >= 0) { + ret = intel_gt_wait_for_idle(gt, HZ * 5); + if (ret < 0) + pr_err("GT failed to idle: %d\n", ret); + } + +out: + multi_lrc_context_unpin(parent); + multi_lrc_context_put(parent); + return ret; +} + +static int intel_guc_multi_lrc_basic(void *arg) +{ + struct intel_gt *gt = arg; + unsigned int class; + int ret; + + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { + ret = __intel_guc_multi_lrc_basic(gt, class); + if (ret) + return ret; + } + + return 0; +} + +int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_multi_lrc_basic), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0d18e13e3468..6c804102528b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -576,7 +576,7 @@ retry: /* No one is going to touch shadow bb from now on. */ i915_gem_object_flush_map(bb->obj); - i915_gem_object_unlock(bb->obj); + i915_gem_ww_ctx_fini(&ww); } } return 0; @@ -630,7 +630,7 @@ retry: return ret; } - i915_gem_object_unlock(wa_ctx->indirect_ctx.obj); + i915_gem_ww_ctx_fini(&ww); /* FIXME: we are not tracking our pinned VMA leaving it * up to the core to fix up the stray pin_count upon diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 54ae75db121f..fe638b5da7c0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -35,6 +35,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_clock_utils.h" +#include "gt/intel_gt_debugfs.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm_debugfs.h" #include "gt/intel_gt_requests.h" @@ -553,36 +554,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused) return 0; } -static int -i915_wedged_get(void *data, u64 *val) +static int i915_wedged_get(void *data, u64 *val) { struct drm_i915_private *i915 = data; - int ret = intel_gt_terminally_wedged(&i915->gt); - switch (ret) { - case -EIO: - *val = 1; - return 0; - case 0: - *val = 0; - return 0; - default: - return ret; - } + return intel_gt_debugfs_reset_show(&i915->gt, val); } -static int -i915_wedged_set(void *data, u64 val) +static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - /* Flush any previous reset before applying for a new one */ - wait_event(i915->gt.reset.queue, - !test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags)); - - intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE, - "Manually set wedged engine mask = %llx", val); - return 0; + return intel_gt_debugfs_reset_store(&i915->gt, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, @@ -727,27 +710,15 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - struct intel_gt *gt = &i915->gt; - - atomic_inc(>->user_wakeref); - intel_gt_pm_get(gt); - if (GRAPHICS_VER(i915) >= 6) - intel_uncore_forcewake_user_get(gt->uncore); - return 0; + return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt); } static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - struct intel_gt *gt = &i915->gt; - if (GRAPHICS_VER(i915) >= 6) - intel_uncore_forcewake_user_put(&i915->uncore); - intel_gt_pm_put(gt); - atomic_dec(>->user_wakeref); - - return 0; + return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt); } static const struct file_operations i915_forcewake_fops = { diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 5e2b909827f4..51b368be0fc4 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915, for_each_uabi_engine(engine, i915) { info.engine.engine_class = engine->uabi_class; info.engine.engine_instance = engine->uabi_instance; + info.flags = I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE; info.capabilities = engine->uabi_capabilities; + info.logical_instance = ilog2(engine->logical_mask); if (copy_to_user(info_ptr, &info, sizeof(info))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ed64fa9defdf..2c3cd6e635b5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1335,6 +1335,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) return err; } +static inline bool is_parallel_rq(struct i915_request *rq) +{ + return intel_context_is_parallel(rq->context); +} + +static inline struct intel_context *request_to_parent(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + +static bool is_same_parallel_context(struct i915_request *to, + struct i915_request *from) +{ + if (is_parallel_rq(to)) + return request_to_parent(to) == request_to_parent(from); + + return false; +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence) @@ -1366,11 +1385,14 @@ i915_request_await_execution(struct i915_request *rq, * want to run our callback in all cases. */ - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = __i915_request_await_execution(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; } while (--nchild); @@ -1471,10 +1493,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) fence)) continue; - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = i915_request_await_request(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; @@ -1550,35 +1575,51 @@ i915_request_await_object(struct i915_request *to, } static struct i915_request * -__i915_request_add_to_timeline(struct i915_request *rq) +__i915_request_ensure_parallel_ordering(struct i915_request *rq, + struct intel_timeline *timeline) { - struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; - /* - * Dependency tracking and request ordering along the timeline - * is special cased so that we can eliminate redundant ordering - * operations while building the request (we know that the timeline - * itself is ordered, and here we guarantee it). - * - * As we know we will need to emit tracking along the timeline, - * we embed the hooks into our request struct -- at the cost of - * having to have specialised no-allocation interfaces (which will - * be beneficial elsewhere). - * - * A second benefit to open-coding i915_request_await_request is - * that we can apply a slight variant of the rules specialised - * for timelines that jump between engines (such as virtual engines). - * If we consider the case of virtual engine, we must emit a dma-fence - * to prevent scheduling of the second request until the first is - * complete (to maximise our greedy late load balancing) and this - * precludes optimising to use semaphores serialisation of a single - * timeline across engines. - */ + GEM_BUG_ON(!is_parallel_rq(rq)); + + prev = request_to_parent(rq)->parallel.last_rq; + if (prev) { + if (!__i915_request_is_complete(prev)) { + i915_sw_fence_await_sw_fence(&rq->submit, + &prev->submit, + &rq->submitq); + + if (rq->engine->sched_engine->schedule) + __i915_sched_node_add_dependency(&rq->sched, + &prev->sched, + &rq->dep, + 0); + } + i915_request_put(prev); + } + + request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); + + return to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); +} + +static struct i915_request * +__i915_request_ensure_ordering(struct i915_request *rq, + struct intel_timeline *timeline) +{ + struct i915_request *prev; + + GEM_BUG_ON(is_parallel_rq(rq)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); + if (prev && !__i915_request_is_complete(prev)) { bool uses_guc = intel_engine_uses_guc(rq->engine); + bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask | + rq->engine->mask); + bool same_context = prev->context == rq->context; /* * The requests are supposed to be kept in order. However, @@ -1586,13 +1627,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) * is used as a barrier for external modification to this * context. */ - GEM_BUG_ON(prev->context == rq->context && + GEM_BUG_ON(same_context && i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if ((!uses_guc && - is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || - (uses_guc && prev->context == rq->context)) + if ((same_context && uses_guc) || (!uses_guc && pow2)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@ -1607,6 +1646,50 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } + return prev; +} + +static struct i915_request * +__i915_request_add_to_timeline(struct i915_request *rq) +{ + struct intel_timeline *timeline = i915_request_timeline(rq); + struct i915_request *prev; + + /* + * Dependency tracking and request ordering along the timeline + * is special cased so that we can eliminate redundant ordering + * operations while building the request (we know that the timeline + * itself is ordered, and here we guarantee it). + * + * As we know we will need to emit tracking along the timeline, + * we embed the hooks into our request struct -- at the cost of + * having to have specialised no-allocation interfaces (which will + * be beneficial elsewhere). + * + * A second benefit to open-coding i915_request_await_request is + * that we can apply a slight variant of the rules specialised + * for timelines that jump between engines (such as virtual engines). + * If we consider the case of virtual engine, we must emit a dma-fence + * to prevent scheduling of the second request until the first is + * complete (to maximise our greedy late load balancing) and this + * precludes optimising to use semaphores serialisation of a single + * timeline across engines. + * + * We do not order parallel submission requests on the timeline as each + * parallel submission context has its own timeline and the ordering + * rules for parallel requests are that they must be submitted in the + * order received from the execbuf IOCTL. So rather than using the + * timeline we store a pointer to last request submitted in the + * relationship in the gem context and insert a submission fence + * between that request and request passed into this function or + * alternatively we use completion fence if gem context has a single + * timeline and this is the first submission of an execbuf IOCTL. + */ + if (likely(!is_parallel_rq(rq))) + prev = __i915_request_ensure_ordering(rq, timeline); + else + prev = __i915_request_ensure_parallel_ordering(rq, timeline); + /* * Make sure that no request gazumped us - if it was allocated after * our i915_request_alloc() and called __i915_request_add() before diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7bd9ed20623e..dc359242d1ae 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -139,6 +139,29 @@ enum { * the GPU. Here we track such boost requests on a per-request basis. */ I915_FENCE_FLAG_BOOST, + + /* + * I915_FENCE_FLAG_SUBMIT_PARALLEL - request with a context in a + * parent-child relationship (parallel submission, multi-lrc) should + * trigger a submission to the GuC rather than just moving the context + * tail. + */ + I915_FENCE_FLAG_SUBMIT_PARALLEL, + + /* + * I915_FENCE_FLAG_SKIP_PARALLEL - request with a context in a + * parent-child relationship (parallel submission, multi-lrc) that + * hit an error while generating requests in the execbuf IOCTL. + * Indicates this request should be skipped as another request in + * submission / relationship encoutered an error. + */ + I915_FENCE_FLAG_SKIP_PARALLEL, + + /* + * I915_FENCE_FLAG_COMPOSITE - Indicates fence is part of a composite + * fence (dma_fence_array) and i915 generated for parallel submission. + */ + I915_FENCE_FLAG_COMPOSITE, }; /** diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4b7fc4647e46..90546fa58fc1 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1234,9 +1234,10 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) return i915_active_add_request(&vma->active, rq); } -int i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) +int _i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + struct dma_fence *fence, + unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; int err; @@ -1257,9 +1258,11 @@ int i915_vma_move_to_active(struct i915_vma *vma, intel_frontbuffer_put(front); } - dma_resv_add_excl_fence(vma->resv, &rq->fence); - obj->write_domain = I915_GEM_DOMAIN_RENDER; - obj->read_domains = 0; + if (fence) { + dma_resv_add_excl_fence(vma->resv, fence); + obj->write_domain = I915_GEM_DOMAIN_RENDER; + obj->read_domains = 0; + } } else { if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { err = dma_resv_reserve_shared(vma->resv, 1); @@ -1267,8 +1270,10 @@ int i915_vma_move_to_active(struct i915_vma *vma, return err; } - dma_resv_add_shared_fence(vma->resv, &rq->fence); - obj->write_domain = 0; + if (fence) { + dma_resv_add_shared_fence(vma->resv, fence); + obj->write_domain = 0; + } } if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index ed69f66c7ab0..648dbe744c96 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) int __must_check __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq); -int __must_check i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags); +int __must_check _i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq, + struct dma_fence *fence, + unsigned int flags); +static inline int __must_check +i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, + unsigned int flags) +{ + return _i915_vma_move_to_active(vma, rq, &rq->fence, flags); +} #define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9089569f36a3..cffb3df35a63 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -78,6 +78,8 @@ struct intel_wm_config { static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { + enum pipe pipe; + if (HAS_LLC(dev_priv)) { /* * WaCompressedResourceDisplayNewHashMode:skl,kbl @@ -91,6 +93,16 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) SKL_DE_COMPRESSED_HASH_MODE); } + for_each_pipe(dev_priv, pipe) { + /* + * "Plane N strech max must be programmed to 11b (x1) + * when Async flips are enabled on that plane." + */ + if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + SKL_PLANE1_STRETCH_MAX_MASK, SKL_PLANE1_STRETCH_MAX_X1); + } + /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 545c8f277c46..4f4c2e15e736 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -123,6 +123,12 @@ enum { __INTEL_WAKEREF_PUT_LAST_BIT__ }; +static inline void +intel_wakeref_might_get(struct intel_wakeref *wf) +{ + might_lock(&wf->mutex); +} + /** * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref @@ -170,6 +176,12 @@ intel_wakeref_put_delay(struct intel_wakeref *wf, unsigned long delay) FIELD_PREP(INTEL_WAKEREF_PUT_DELAY, delay)); } +static inline void +intel_wakeref_might_put(struct intel_wakeref *wf) +{ + might_lock(&wf->mutex); +} + /** * intel_wakeref_lock: Lock the wakeref (mutex) * @wf: the wakeref diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 3cf6758931f9..bdd290f2bf3c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -48,5 +48,6 @@ selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) selftest(slpc, intel_slpc_live_selftests) selftest(guc, intel_guc_live_selftests) +selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index efa86dffe3c6..75793008c4ef 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -6,8 +6,6 @@ #include <drm/ttm/ttm_placement.h> #include <linux/scatterlist.h> -#include <drm/ttm/ttm_placement.h> - #include "gem/i915_gem_region.h" #include "intel_memory_region.h" #include "intel_region_ttm.h" diff --git a/drivers/gpu/drm/kmb/kmb_crtc.c b/drivers/gpu/drm/kmb/kmb_crtc.c index 44327bc629ca..06613ffeaaf8 100644 --- a/drivers/gpu/drm/kmb/kmb_crtc.c +++ b/drivers/gpu/drm/kmb/kmb_crtc.c @@ -66,7 +66,8 @@ static const struct drm_crtc_funcs kmb_crtc_funcs = { .disable_vblank = kmb_crtc_disable_vblank, }; -static void kmb_crtc_set_mode(struct drm_crtc *crtc) +static void kmb_crtc_set_mode(struct drm_crtc *crtc, + struct drm_atomic_state *old_state) { struct drm_device *dev = crtc->dev; struct drm_display_mode *m = &crtc->state->adjusted_mode; @@ -75,7 +76,7 @@ static void kmb_crtc_set_mode(struct drm_crtc *crtc) unsigned int val = 0; /* Initialize mipi */ - kmb_dsi_mode_set(kmb->kmb_dsi, m, kmb->sys_clk_mhz); + kmb_dsi_mode_set(kmb->kmb_dsi, m, kmb->sys_clk_mhz, old_state); drm_info(dev, "vfp= %d vbp= %d vsync_len=%d hfp=%d hbp=%d hsync_len=%d\n", m->crtc_vsync_start - m->crtc_vdisplay, @@ -138,7 +139,7 @@ static void kmb_crtc_atomic_enable(struct drm_crtc *crtc, struct kmb_drm_private *kmb = crtc_to_kmb_priv(crtc); clk_prepare_enable(kmb->kmb_clk.clk_lcd); - kmb_crtc_set_mode(crtc); + kmb_crtc_set_mode(crtc, state); drm_crtc_vblank_on(crtc); } @@ -185,11 +186,45 @@ static void kmb_crtc_atomic_flush(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); } +static enum drm_mode_status + kmb_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + int refresh; + struct drm_device *dev = crtc->dev; + int vfp = mode->vsync_start - mode->vdisplay; + + if (mode->vdisplay < KMB_CRTC_MAX_HEIGHT) { + drm_dbg(dev, "height = %d less than %d", + mode->vdisplay, KMB_CRTC_MAX_HEIGHT); + return MODE_BAD_VVALUE; + } + if (mode->hdisplay < KMB_CRTC_MAX_WIDTH) { + drm_dbg(dev, "width = %d less than %d", + mode->hdisplay, KMB_CRTC_MAX_WIDTH); + return MODE_BAD_HVALUE; + } + refresh = drm_mode_vrefresh(mode); + if (refresh < KMB_MIN_VREFRESH || refresh > KMB_MAX_VREFRESH) { + drm_dbg(dev, "refresh = %d less than %d or greater than %d", + refresh, KMB_MIN_VREFRESH, KMB_MAX_VREFRESH); + return MODE_BAD; + } + + if (vfp < KMB_CRTC_MIN_VFP) { + drm_dbg(dev, "vfp = %d less than %d", vfp, KMB_CRTC_MIN_VFP); + return MODE_BAD; + } + + return MODE_OK; +} + static const struct drm_crtc_helper_funcs kmb_crtc_helper_funcs = { .atomic_begin = kmb_crtc_atomic_begin, .atomic_enable = kmb_crtc_atomic_enable, .atomic_disable = kmb_crtc_atomic_disable, .atomic_flush = kmb_crtc_atomic_flush, + .mode_valid = kmb_crtc_mode_valid, }; int kmb_setup_crtc(struct drm_device *drm) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 1c2f4799f421..961ac6fb5fcf 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -172,10 +172,10 @@ static int kmb_setup_mode_config(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) return ret; - drm->mode_config.min_width = KMB_MIN_WIDTH; - drm->mode_config.min_height = KMB_MIN_HEIGHT; - drm->mode_config.max_width = KMB_MAX_WIDTH; - drm->mode_config.max_height = KMB_MAX_HEIGHT; + drm->mode_config.min_width = KMB_FB_MIN_WIDTH; + drm->mode_config.min_height = KMB_FB_MIN_HEIGHT; + drm->mode_config.max_width = KMB_FB_MAX_WIDTH; + drm->mode_config.max_height = KMB_FB_MAX_HEIGHT; drm->mode_config.funcs = &kmb_mode_config_funcs; ret = kmb_setup_crtc(drm); @@ -380,7 +380,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) if (val & LAYER3_DMA_FIFO_UNDERFLOW) drm_dbg(&kmb->drm, "LAYER3:GL1 DMA UNDERFLOW val = 0x%lx", val); - if (val & LAYER3_DMA_FIFO_UNDERFLOW) + if (val & LAYER3_DMA_FIFO_OVERFLOW) drm_dbg(&kmb->drm, "LAYER3:GL1 DMA OVERFLOW val = 0x%lx", val); } diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index ebbaa5f422d5..bf085e95b28f 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -20,6 +20,18 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 1 +/* Platform definitions */ +#define KMB_CRTC_MIN_VFP 4 +#define KMB_CRTC_MAX_WIDTH 1920 /* max width in pixels */ +#define KMB_CRTC_MAX_HEIGHT 1080 /* max height in pixels */ +#define KMB_CRTC_MIN_WIDTH 1920 +#define KMB_CRTC_MIN_HEIGHT 1080 +#define KMB_FB_MAX_WIDTH 1920 +#define KMB_FB_MAX_HEIGHT 1080 +#define KMB_FB_MIN_WIDTH 1 +#define KMB_FB_MIN_HEIGHT 1 +#define KMB_MIN_VREFRESH 59 /*vertical refresh in Hz */ +#define KMB_MAX_VREFRESH 60 /*vertical refresh in Hz */ #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 @@ -45,6 +57,7 @@ struct kmb_drm_private { spinlock_t irq_lock; int irq_lcd; int sys_clk_mhz; + struct disp_cfg init_disp_cfg[KMB_MAX_PLANES]; struct layer_status plane_status[KMB_MAX_PLANES]; int kmb_under_flow; int kmb_flush_done; diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c index 1793cd31b117..f6071882054c 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.c +++ b/drivers/gpu/drm/kmb/kmb_dsi.c @@ -482,6 +482,10 @@ static u32 mipi_tx_fg_section_cfg(struct kmb_dsi *kmb_dsi, return 0; } +#define CLK_DIFF_LOW 50 +#define CLK_DIFF_HI 60 +#define SYSCLK_500 500 + static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, u8 frame_gen, struct mipi_tx_frame_timing_cfg *fg_cfg) { @@ -492,7 +496,12 @@ static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, u8 frame_gen, /* 500 Mhz system clock minus 50 to account for the difference in * MIPI clock speed in RTL tests */ - sysclk = kmb_dsi->sys_clk_mhz - 50; + if (kmb_dsi->sys_clk_mhz == SYSCLK_500) { + sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_LOW; + } else { + /* 700 Mhz clk*/ + sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_HI; + } /* PPL-Pixel Packing Layer, LLP-Low Level Protocol * Frame genartor timing parameters are clocked on the system clock, @@ -1322,7 +1331,8 @@ static u32 mipi_tx_init_dphy(struct kmb_dsi *kmb_dsi, return 0; } -static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) +static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi, + struct drm_atomic_state *old_state) { struct regmap *msscam; @@ -1331,7 +1341,7 @@ static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) dev_dbg(kmb_dsi->dev, "failed to get msscam syscon"); return; } - + drm_atomic_bridge_chain_enable(adv_bridge, old_state); /* DISABLE MIPI->CIF CONNECTION */ regmap_write(msscam, MSS_MIPI_CIF_CFG, 0); @@ -1342,7 +1352,7 @@ static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) } int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, - int sys_clk_mhz) + int sys_clk_mhz, struct drm_atomic_state *old_state) { u64 data_rate; @@ -1384,18 +1394,13 @@ int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, mipi_tx_init_cfg.lane_rate_mbps = data_rate; } - kmb_write_mipi(kmb_dsi, DPHY_ENABLE, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL0, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL1, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL2, 0); - /* Initialize mipi controller */ mipi_tx_init_cntrl(kmb_dsi, &mipi_tx_init_cfg); /* Dphy initialization */ mipi_tx_init_dphy(kmb_dsi, &mipi_tx_init_cfg); - connect_lcd_to_mipi(kmb_dsi); + connect_lcd_to_mipi(kmb_dsi, old_state); dev_info(kmb_dsi->dev, "mipi hw initialized"); return 0; diff --git a/drivers/gpu/drm/kmb/kmb_dsi.h b/drivers/gpu/drm/kmb/kmb_dsi.h index 66b7c500d9bc..09dc88743d77 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.h +++ b/drivers/gpu/drm/kmb/kmb_dsi.h @@ -380,7 +380,7 @@ int kmb_dsi_host_bridge_init(struct device *dev); struct kmb_dsi *kmb_dsi_init(struct platform_device *pdev); void kmb_dsi_host_unregister(struct kmb_dsi *kmb_dsi); int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, - int sys_clk_mhz); + int sys_clk_mhz, struct drm_atomic_state *old_state); int kmb_dsi_map_mmio(struct kmb_dsi *kmb_dsi); int kmb_dsi_clk_init(struct kmb_dsi *kmb_dsi); int kmb_dsi_encoder_init(struct drm_device *dev, struct kmb_dsi *kmb_dsi); diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index ecee6782612d..00404ba4126d 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -67,8 +67,21 @@ static const u32 kmb_formats_v[] = { static unsigned int check_pixel_format(struct drm_plane *plane, u32 format) { + struct kmb_drm_private *kmb; + struct kmb_plane *kmb_plane = to_kmb_plane(plane); int i; + int plane_id = kmb_plane->id; + struct disp_cfg init_disp_cfg; + kmb = to_kmb(plane->dev); + init_disp_cfg = kmb->init_disp_cfg[plane_id]; + /* Due to HW limitations, changing pixel format after initial + * plane configuration is not supported. + */ + if (init_disp_cfg.format && init_disp_cfg.format != format) { + drm_dbg(&kmb->drm, "Cannot change format after initial plane configuration"); + return -EINVAL; + } for (i = 0; i < plane->format_count; i++) { if (plane->format_types[i] == format) return 0; @@ -81,11 +94,17 @@ static int kmb_plane_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct kmb_drm_private *kmb; + struct kmb_plane *kmb_plane = to_kmb_plane(plane); + int plane_id = kmb_plane->id; + struct disp_cfg init_disp_cfg; struct drm_framebuffer *fb; int ret; struct drm_crtc_state *crtc_state; bool can_position; + kmb = to_kmb(plane->dev); + init_disp_cfg = kmb->init_disp_cfg[plane_id]; fb = new_plane_state->fb; if (!fb || !new_plane_state->crtc) return 0; @@ -94,10 +113,21 @@ static int kmb_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; - if (new_plane_state->crtc_w > KMB_MAX_WIDTH || new_plane_state->crtc_h > KMB_MAX_HEIGHT) + if (new_plane_state->crtc_w > KMB_FB_MAX_WIDTH || + new_plane_state->crtc_h > KMB_FB_MAX_HEIGHT || + new_plane_state->crtc_w < KMB_FB_MIN_WIDTH || + new_plane_state->crtc_h < KMB_FB_MIN_HEIGHT) return -EINVAL; - if (new_plane_state->crtc_w < KMB_MIN_WIDTH || new_plane_state->crtc_h < KMB_MIN_HEIGHT) + + /* Due to HW limitations, changing plane height or width after + * initial plane configuration is not supported. + */ + if ((init_disp_cfg.width && init_disp_cfg.height) && + (init_disp_cfg.width != fb->width || + init_disp_cfg.height != fb->height)) { + drm_dbg(&kmb->drm, "Cannot change plane height or width after initial configuration"); return -EINVAL; + } can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY); crtc_state = drm_atomic_get_existing_crtc_state(state, @@ -277,6 +307,44 @@ static void config_csc(struct kmb_drm_private *kmb, int plane_id) kmb_write_lcd(kmb, LCD_LAYERn_CSC_OFF3(plane_id), csc_coef_lcd[11]); } +static void kmb_plane_set_alpha(struct kmb_drm_private *kmb, + const struct drm_plane_state *state, + unsigned char plane_id, + unsigned int *val) +{ + u16 plane_alpha = state->alpha; + u16 pixel_blend_mode = state->pixel_blend_mode; + int has_alpha = state->fb->format->has_alpha; + + if (plane_alpha != DRM_BLEND_ALPHA_OPAQUE) + *val |= LCD_LAYER_ALPHA_STATIC; + + if (has_alpha) { + switch (pixel_blend_mode) { + case DRM_MODE_BLEND_PIXEL_NONE: + break; + case DRM_MODE_BLEND_PREMULTI: + *val |= LCD_LAYER_ALPHA_EMBED | LCD_LAYER_ALPHA_PREMULT; + break; + case DRM_MODE_BLEND_COVERAGE: + *val |= LCD_LAYER_ALPHA_EMBED; + break; + default: + DRM_DEBUG("Missing pixel blend mode case (%s == %ld)\n", + __stringify(pixel_blend_mode), + (long)pixel_blend_mode); + break; + } + } + + if (plane_alpha == DRM_BLEND_ALPHA_OPAQUE && !has_alpha) { + *val &= LCD_LAYER_ALPHA_DISABLED; + return; + } + + kmb_write_lcd(kmb, LCD_LAYERn_ALPHA(plane_id), plane_alpha); +} + static void kmb_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -296,6 +364,7 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, unsigned char plane_id; int num_planes; static dma_addr_t addr[MAX_SUB_PLANES]; + struct disp_cfg *init_disp_cfg; if (!plane || !new_plane_state || !old_plane_state) return; @@ -303,11 +372,12 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, fb = new_plane_state->fb; if (!fb) return; + num_planes = fb->format->num_planes; kmb_plane = to_kmb_plane(plane); - plane_id = kmb_plane->id; kmb = to_kmb(plane->dev); + plane_id = kmb_plane->id; spin_lock_irq(&kmb->irq_lock); if (kmb->kmb_under_flow || kmb->kmb_flush_done) { @@ -317,7 +387,8 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, } spin_unlock_irq(&kmb->irq_lock); - src_w = (new_plane_state->src_w >> 16); + init_disp_cfg = &kmb->init_disp_cfg[plane_id]; + src_w = new_plane_state->src_w >> 16; src_h = new_plane_state->src_h >> 16; crtc_x = new_plane_state->crtc_x; crtc_y = new_plane_state->crtc_y; @@ -400,20 +471,32 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, config_csc(kmb, plane_id); } + kmb_plane_set_alpha(kmb, plane->state, plane_id, &val); + kmb_write_lcd(kmb, LCD_LAYERn_CFG(plane_id), val); + /* Configure LCD_CONTROL */ + ctrl = kmb_read_lcd(kmb, LCD_CONTROL); + + /* Set layer blending config */ + ctrl &= ~LCD_CTRL_ALPHA_ALL; + ctrl |= LCD_CTRL_ALPHA_BOTTOM_VL1 | + LCD_CTRL_ALPHA_BLEND_VL2; + + ctrl &= ~LCD_CTRL_ALPHA_BLEND_BKGND_DISABLE; + switch (plane_id) { case LAYER_0: - ctrl = LCD_CTRL_VL1_ENABLE; + ctrl |= LCD_CTRL_VL1_ENABLE; break; case LAYER_1: - ctrl = LCD_CTRL_VL2_ENABLE; + ctrl |= LCD_CTRL_VL2_ENABLE; break; case LAYER_2: - ctrl = LCD_CTRL_GL1_ENABLE; + ctrl |= LCD_CTRL_GL1_ENABLE; break; case LAYER_3: - ctrl = LCD_CTRL_GL2_ENABLE; + ctrl |= LCD_CTRL_GL2_ENABLE; break; } @@ -425,7 +508,7 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, */ ctrl |= LCD_CTRL_VHSYNC_IDLE_LVL; - kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl); + kmb_write_lcd(kmb, LCD_CONTROL, ctrl); /* Enable pipeline AXI read transactions for the DMA * after setting graphics layers. This must be done @@ -448,6 +531,16 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, /* Enable DMA */ kmb_write_lcd(kmb, LCD_LAYERn_DMA_CFG(plane_id), dma_cfg); + + /* Save initial display config */ + if (!init_disp_cfg->width || + !init_disp_cfg->height || + !init_disp_cfg->format) { + init_disp_cfg->width = width; + init_disp_cfg->height = height; + init_disp_cfg->format = fb->format->format; + } + drm_dbg(&kmb->drm, "dma_cfg=0x%x LCD_DMA_CFG=0x%x\n", dma_cfg, kmb_read_lcd(kmb, LCD_LAYERn_DMA_CFG(plane_id))); @@ -490,6 +583,9 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) enum drm_plane_type plane_type; const u32 *plane_formats; int num_plane_formats; + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); for (i = 0; i < KMB_MAX_PLANES; i++) { plane = drmm_kzalloc(drm, sizeof(*plane), GFP_KERNEL); @@ -521,8 +617,16 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) drm_dbg(drm, "%s : %d i=%d type=%d", __func__, __LINE__, i, plane_type); + drm_plane_create_alpha_property(&plane->base_plane); + + drm_plane_create_blend_mode_property(&plane->base_plane, + blend_caps); + + drm_plane_create_zpos_immutable_property(&plane->base_plane, i); + drm_plane_helper_add(&plane->base_plane, &kmb_plane_helper_funcs); + if (plane_type == DRM_PLANE_TYPE_PRIMARY) { primary = plane; kmb->plane = plane; diff --git a/drivers/gpu/drm/kmb/kmb_plane.h b/drivers/gpu/drm/kmb/kmb_plane.h index 486490f7a3ec..b51144044fe8 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.h +++ b/drivers/gpu/drm/kmb/kmb_plane.h @@ -35,6 +35,9 @@ #define POSSIBLE_CRTCS 1 #define to_kmb_plane(x) container_of(x, struct kmb_plane, base_plane) +#define POSSIBLE_CRTCS 1 +#define KMB_MAX_PLANES 2 + enum layer_id { LAYER_0, LAYER_1, @@ -43,8 +46,6 @@ enum layer_id { /* KMB_MAX_PLANES */ }; -#define KMB_MAX_PLANES 1 - enum sub_plane_id { Y_PLANE, U_PLANE, @@ -62,6 +63,12 @@ struct layer_status { u32 ctrl; }; +struct disp_cfg { + unsigned int width; + unsigned int height; + unsigned int format; +}; + struct kmb_plane *kmb_plane_init(struct drm_device *drm); void kmb_plane_destroy(struct drm_plane *plane); #endif /* __KMB_PLANE_H__ */ diff --git a/drivers/gpu/drm/kmb/kmb_regs.h b/drivers/gpu/drm/kmb/kmb_regs.h index 48150569f702..9756101b0d32 100644 --- a/drivers/gpu/drm/kmb/kmb_regs.h +++ b/drivers/gpu/drm/kmb/kmb_regs.h @@ -43,8 +43,10 @@ #define LCD_CTRL_OUTPUT_ENABLED BIT(19) #define LCD_CTRL_BPORCH_ENABLE BIT(21) #define LCD_CTRL_FPORCH_ENABLE BIT(22) +#define LCD_CTRL_ALPHA_BLEND_BKGND_DISABLE BIT(23) #define LCD_CTRL_PIPELINE_DMA BIT(28) #define LCD_CTRL_VHSYNC_IDLE_LVL BIT(31) +#define LCD_CTRL_ALPHA_ALL (0xff << 6) /* interrupts */ #define LCD_INT_STATUS (0x4 * 0x001) @@ -115,6 +117,7 @@ #define LCD_LAYER_ALPHA_EMBED BIT(5) #define LCD_LAYER_ALPHA_COMBI (LCD_LAYER_ALPHA_STATIC | \ LCD_LAYER_ALPHA_EMBED) +#define LCD_LAYER_ALPHA_DISABLED ~(LCD_LAYER_ALPHA_COMBI) /* RGB multiplied with alpha */ #define LCD_LAYER_ALPHA_PREMULT BIT(6) #define LCD_LAYER_INVERT_COL BIT(7) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 5f81489fc60c..a4e80e499674 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -4,8 +4,6 @@ */ #include <linux/clk.h> -#include <linux/dma-mapping.h> -#include <linux/mailbox_controller.h> #include <linux/pm_runtime.h> #include <linux/soc/mediatek/mtk-cmdq.h> #include <linux/soc/mediatek/mtk-mmsys.h> @@ -52,11 +50,8 @@ struct mtk_drm_crtc { bool pending_async_planes; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct mbox_client cmdq_cl; - struct mbox_chan *cmdq_chan; - struct cmdq_pkt cmdq_handle; + struct cmdq_client *cmdq_client; u32 cmdq_event; - u32 cmdq_vblank_cnt; #endif struct device *mmsys_dev; @@ -227,79 +222,9 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, } #if IS_REACHABLE(CONFIG_MTK_CMDQ) -static int mtk_drm_cmdq_pkt_create(struct mbox_chan *chan, struct cmdq_pkt *pkt, - size_t size) +static void ddp_cmdq_cb(struct cmdq_cb_data data) { - struct device *dev; - dma_addr_t dma_addr; - - pkt->va_base = kzalloc(size, GFP_KERNEL); - if (!pkt->va_base) { - kfree(pkt); - return -ENOMEM; - } - pkt->buf_size = size; - - dev = chan->mbox->dev; - dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size); - kfree(pkt->va_base); - kfree(pkt); - return -ENOMEM; - } - - pkt->pa_base = dma_addr; - - return 0; -} - -static void mtk_drm_cmdq_pkt_destroy(struct mbox_chan *chan, struct cmdq_pkt *pkt) -{ - dma_unmap_single(chan->mbox->dev, pkt->pa_base, pkt->buf_size, - DMA_TO_DEVICE); - kfree(pkt->va_base); - kfree(pkt); -} - -static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) -{ - struct mtk_drm_crtc *mtk_crtc = container_of(cl, struct mtk_drm_crtc, cmdq_cl); - struct cmdq_cb_data *data = mssg; - struct mtk_crtc_state *state; - unsigned int i; - - state = to_mtk_crtc_state(mtk_crtc->base.state); - - state->pending_config = false; - - if (mtk_crtc->pending_planes) { - for (i = 0; i < mtk_crtc->layer_nr; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i]; - struct mtk_plane_state *plane_state; - - plane_state = to_mtk_plane_state(plane->state); - - plane_state->pending.config = false; - } - mtk_crtc->pending_planes = false; - } - - if (mtk_crtc->pending_async_planes) { - for (i = 0; i < mtk_crtc->layer_nr; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i]; - struct mtk_plane_state *plane_state; - - plane_state = to_mtk_plane_state(plane->state); - - plane_state->pending.async_config = false; - } - mtk_crtc->pending_async_planes = false; - } - - mtk_crtc->cmdq_vblank_cnt = 0; - mtk_drm_cmdq_pkt_destroy(mtk_crtc->cmdq_chan, data->pkt); + cmdq_pkt_destroy(data.data); } #endif @@ -453,8 +378,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, state->pending_vrefresh, 0, cmdq_handle); - if (!cmdq_handle) - state->pending_config = false; + state->pending_config = false; } if (mtk_crtc->pending_planes) { @@ -474,12 +398,9 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - if (!cmdq_handle) - plane_state->pending.config = false; + plane_state->pending.config = false; } - - if (!cmdq_handle) - mtk_crtc->pending_planes = false; + mtk_crtc->pending_planes = false; } if (mtk_crtc->pending_async_planes) { @@ -499,12 +420,9 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - if (!cmdq_handle) - plane_state->pending.async_config = false; + plane_state->pending.async_config = false; } - - if (!cmdq_handle) - mtk_crtc->pending_async_planes = false; + mtk_crtc->pending_async_planes = false; } } @@ -512,7 +430,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, bool needs_vblank) { #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_pkt *cmdq_handle = &mtk_crtc->cmdq_handle; + struct cmdq_pkt *cmdq_handle; #endif struct drm_crtc *crtc = &mtk_crtc->base; struct mtk_drm_private *priv = crtc->dev->dev_private; @@ -550,24 +468,14 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, mtk_mutex_release(mtk_crtc->mutex); } #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (mtk_crtc->cmdq_chan) { - mbox_flush(mtk_crtc->cmdq_chan, 2000); - cmdq_handle->cmd_buf_size = 0; + if (mtk_crtc->cmdq_client) { + mbox_flush(mtk_crtc->cmdq_client->chan, 2000); + cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); cmdq_pkt_finalize(cmdq_handle); - dma_sync_single_for_device(mtk_crtc->cmdq_chan->mbox->dev, - cmdq_handle->pa_base, - cmdq_handle->cmd_buf_size, - DMA_TO_DEVICE); - /* - * CMDQ command should execute in next vblank, - * If it fail to execute in next 2 vblank, timeout happen. - */ - mtk_crtc->cmdq_vblank_cnt = 2; - mbox_send_message(mtk_crtc->cmdq_chan, cmdq_handle); - mbox_client_txdone(mtk_crtc->cmdq_chan, 0); + cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle); } #endif mtk_crtc->config_updating = false; @@ -581,15 +489,12 @@ static void mtk_crtc_ddp_irq(void *data) struct mtk_drm_private *priv = crtc->dev->dev_private; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (!priv->data->shadow_register && !mtk_crtc->cmdq_chan) - mtk_crtc_ddp_config(crtc, NULL); - else if (mtk_crtc->cmdq_vblank_cnt > 0 && --mtk_crtc->cmdq_vblank_cnt == 0) - DRM_ERROR("mtk_crtc %d CMDQ execute command timeout!\n", - drm_crtc_index(&mtk_crtc->base)); + if (!priv->data->shadow_register && !mtk_crtc->cmdq_client) #else if (!priv->data->shadow_register) - mtk_crtc_ddp_config(crtc, NULL); #endif + mtk_crtc_ddp_config(crtc, NULL); + mtk_drm_finish_page_flip(mtk_crtc); } @@ -924,20 +829,16 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mutex_init(&mtk_crtc->hw_lock); #if IS_REACHABLE(CONFIG_MTK_CMDQ) - mtk_crtc->cmdq_cl.dev = mtk_crtc->mmsys_dev; - mtk_crtc->cmdq_cl.tx_block = false; - mtk_crtc->cmdq_cl.knows_txdone = true; - mtk_crtc->cmdq_cl.rx_callback = ddp_cmdq_cb; - mtk_crtc->cmdq_chan = - mbox_request_channel(&mtk_crtc->cmdq_cl, - drm_crtc_index(&mtk_crtc->base)); - if (IS_ERR(mtk_crtc->cmdq_chan)) { + mtk_crtc->cmdq_client = + cmdq_mbox_create(mtk_crtc->mmsys_dev, + drm_crtc_index(&mtk_crtc->base)); + if (IS_ERR(mtk_crtc->cmdq_client)) { dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", drm_crtc_index(&mtk_crtc->base)); - mtk_crtc->cmdq_chan = NULL; + mtk_crtc->cmdq_client = NULL; } - if (mtk_crtc->cmdq_chan) { + if (mtk_crtc->cmdq_client) { ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), @@ -945,18 +846,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, if (ret) { dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", drm_crtc_index(&mtk_crtc->base)); - mbox_free_channel(mtk_crtc->cmdq_chan); - mtk_crtc->cmdq_chan = NULL; - } else { - ret = mtk_drm_cmdq_pkt_create(mtk_crtc->cmdq_chan, - &mtk_crtc->cmdq_handle, - PAGE_SIZE); - if (ret) { - dev_dbg(dev, "mtk_crtc %d failed to create cmdq packet\n", - drm_crtc_index(&mtk_crtc->base)); - mbox_free_channel(mtk_crtc->cmdq_chan); - mtk_crtc->cmdq_chan = NULL; - } + cmdq_mbox_destroy(mtk_crtc->cmdq_client); + mtk_crtc->cmdq_client = NULL; } } #endif diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 5879f67bc88c..ae11061727ff 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -14,10 +14,12 @@ config DRM_MSM select REGULATOR select DRM_KMS_HELPER select DRM_PANEL + select DRM_BRIDGE + select DRM_PANEL_BRIDGE select DRM_SCHED select SHMEM select TMPFS - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE @@ -55,7 +57,7 @@ config DRM_MSM_GPU_SUDO config DRM_MSM_HDMI_HDCP bool "Enable HDMI HDCP support in MSM DRM driver" - depends on DRM_MSM && QCOM_SCM + depends on DRM_MSM default y help Choose this option to enable HDCP state machine diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 904535eda0c4..40577f8856d8 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -51,7 +51,6 @@ msm-y := \ disp/mdp5/mdp5_mixer.o \ disp/mdp5/mdp5_plane.o \ disp/mdp5/mdp5_smp.o \ - disp/dpu1/dpu_core_irq.o \ disp/dpu1/dpu_core_perf.o \ disp/dpu1/dpu_crtc.o \ disp/dpu1/dpu_encoder.o \ diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 4534633fe7cd..8fb847c174ff 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -571,13 +571,14 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); - ret = IS_ERR(icc_path); - if (ret) + if (IS_ERR(icc_path)) { + ret = PTR_ERR(icc_path); goto fail; + } ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); - ret = IS_ERR(ocmem_icc_path); - if (ret) { + if (IS_ERR(ocmem_icc_path)) { + ret = PTR_ERR(ocmem_icc_path); /* allow -ENODATA, ocmem icc is optional */ if (ret != -ENODATA) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 82bebb40234d..a96ee79cc5e0 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -699,13 +699,14 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); - ret = IS_ERR(icc_path); - if (ret) + if (IS_ERR(icc_path)) { + ret = PTR_ERR(icc_path); goto fail; + } ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); - ret = IS_ERR(ocmem_icc_path); - if (ret) { + if (IS_ERR(ocmem_icc_path)) { + ret = PTR_ERR(ocmem_icc_path); /* allow -ENODATA, ocmem icc is optional */ if (ret != -ENODATA) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c index c9d11d57aed6..dd593ec2bc56 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c @@ -138,7 +138,7 @@ reset_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(reset_fops, NULL, reset_set, "%llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(reset_fops, NULL, reset_set, "%llx\n"); void a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor) @@ -154,6 +154,6 @@ void a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor) ARRAY_SIZE(a5xx_debugfs_list), minor->debugfs_root, minor); - debugfs_create_file("reset", S_IWUGO, minor->debugfs_root, dev, - &reset_fops); + debugfs_create_file_unsafe("reset", S_IWUGO, minor->debugfs_root, dev, + &reset_fops); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index a7c58018959f..71e52b2b2025 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -296,6 +296,8 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) u32 val; int request, ack; + WARN_ON_ONCE(!mutex_is_locked(&gmu->lock)); + if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits)) return -EINVAL; @@ -337,6 +339,8 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) { int bit; + WARN_ON_ONCE(!mutex_is_locked(&gmu->lock)); + if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits)) return; @@ -512,11 +516,11 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct platform_device *pdev = to_platform_device(gmu->dev); void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc"); - void __iomem *seqptr; + void __iomem *seqptr = NULL; uint32_t pdc_address_offset; bool pdc_in_aop = false; - if (!pdcptr) + if (IS_ERR(pdcptr)) goto err; if (adreno_is_a650(adreno_gpu) || adreno_is_a660_family(adreno_gpu)) @@ -528,7 +532,7 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) if (!pdc_in_aop) { seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq"); - if (!seqptr) + if (IS_ERR(seqptr)) goto err; } @@ -887,7 +891,7 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); - if (IS_ERR_OR_NULL(gpu_opp)) + if (IS_ERR(gpu_opp)) return; gmu->freq = 0; /* so a6xx_gmu_set_freq() doesn't exit early */ @@ -901,7 +905,7 @@ static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu) unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); - if (IS_ERR_OR_NULL(gpu_opp)) + if (IS_ERR(gpu_opp)) return; dev_pm_opp_set_opp(&gpu->pdev->dev, gpu_opp); @@ -1482,6 +1486,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (!pdev) return -ENODEV; + mutex_init(&gmu->lock); + gmu->dev = &pdev->dev; of_dma_configure(gmu->dev, node, true); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 3c74f64e3126..84bd516f01e8 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -44,6 +44,9 @@ struct a6xx_gmu_bo { struct a6xx_gmu { struct device *dev; + /* For serializing communication with the GMU: */ + struct mutex lock; + struct msm_gem_address_space *aspace; void * __iomem mmio; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 40c9fef457a4..267a880811d6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -106,7 +106,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, u32 asid; u64 memptr = rbmemptr(ring, ttbr0); - if (ctx == a6xx_gpu->cur_ctx) + if (ctx->seqno == a6xx_gpu->cur_ctx_seqno) return; if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) @@ -139,7 +139,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, 0x31); - a6xx_gpu->cur_ctx = ctx; + a6xx_gpu->cur_ctx_seqno = ctx->seqno; } static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) @@ -881,7 +881,7 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) -static int a6xx_hw_init(struct msm_gpu *gpu) +static int hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); @@ -1081,7 +1081,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; - a6xx_gpu->cur_ctx = NULL; + a6xx_gpu->cur_ctx_seqno = 0; /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); @@ -1135,6 +1135,19 @@ out: return ret; } +static int a6xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int ret; + + mutex_lock(&a6xx_gpu->gmu.lock); + ret = hw_init(gpu); + mutex_unlock(&a6xx_gpu->gmu.lock); + + return ret; +} + static void a6xx_dump(struct msm_gpu *gpu) { DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", @@ -1509,7 +1522,9 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) trace_msm_gpu_resume(0); + mutex_lock(&a6xx_gpu->gmu.lock); ret = a6xx_gmu_resume(a6xx_gpu); + mutex_unlock(&a6xx_gpu->gmu.lock); if (ret) return ret; @@ -1532,7 +1547,9 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) msm_devfreq_suspend(gpu); + mutex_lock(&a6xx_gpu->gmu.lock); ret = a6xx_gmu_stop(a6xx_gpu); + mutex_unlock(&a6xx_gpu->gmu.lock); if (ret) return ret; @@ -1547,18 +1564,19 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - static DEFINE_MUTEX(perfcounter_oob); - mutex_lock(&perfcounter_oob); + mutex_lock(&a6xx_gpu->gmu.lock); /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI); + REG_A6XX_CP_ALWAYS_ON_COUNTER_HI); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - mutex_unlock(&perfcounter_oob); + + mutex_unlock(&a6xx_gpu->gmu.lock); + return 0; } @@ -1622,6 +1640,16 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; } +void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + mutex_lock(&a6xx_gpu->gmu.lock); + a6xx_gmu_set_freq(gpu, opp); + mutex_unlock(&a6xx_gpu->gmu.lock); +} + static struct msm_gem_address_space * a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { @@ -1766,7 +1794,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_busy = a6xx_gpu_busy, .gpu_get_freq = a6xx_gmu_get_freq, - .gpu_set_freq = a6xx_gmu_set_freq, + .gpu_set_freq = a6xx_gpu_set_freq, #if defined(CONFIG_DRM_MSM_GPU_STATE) .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, @@ -1810,6 +1838,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (info && (info->revn == 618)) + gpu->clamp_to_idle = true; + a6xx_llc_slices_init(pdev, a6xx_gpu); ret = a6xx_set_supported_hw(&pdev->dev, config->rev); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 0bc2d062f54a..8e5527c881b1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -19,7 +19,16 @@ struct a6xx_gpu { uint64_t sqe_iova; struct msm_ringbuffer *cur_ring; - struct msm_file_private *cur_ctx; + + /** + * cur_ctx_seqno: + * + * The ctx->seqno value of the context with current pgtables + * installed. Tracked by seqno rather than pointer value to + * avoid dangling pointers, and cases where a ctx can be freed + * and a new one created with the same address. + */ + int cur_ctx_seqno; struct a6xx_gmu gmu; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index e8f65cd8eca6..7501849ed15d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -180,7 +180,7 @@ static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, msm_readl((ptr) + ((offset) << 2)) /* read a value from the CX debug bus */ -static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, +static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, u32 *data) { u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c deleted file mode 100644 index d2457490930b..000000000000 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. - */ - -#define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__ - -#include <linux/debugfs.h> -#include <linux/irqdomain.h> -#include <linux/irq.h> -#include <linux/kthread.h> - -#include "dpu_core_irq.h" -#include "dpu_trace.h" - -/** - * dpu_core_irq_callback_handler - dispatch core interrupts - * @arg: private data of callback handler - * @irq_idx: interrupt index - */ -static void dpu_core_irq_callback_handler(void *arg, int irq_idx) -{ - struct dpu_kms *dpu_kms = arg; - struct dpu_irq *irq_obj = &dpu_kms->irq_obj; - struct dpu_irq_callback *cb; - - VERB("irq_idx=%d\n", irq_idx); - - if (list_empty(&irq_obj->irq_cb_tbl[irq_idx])) - DRM_ERROR("no registered cb, idx:%d\n", irq_idx); - - atomic_inc(&irq_obj->irq_counts[irq_idx]); - - /* - * Perform registered function callback - */ - list_for_each_entry(cb, &irq_obj->irq_cb_tbl[irq_idx], list) - if (cb->func) - cb->func(cb->arg, irq_idx); -} - -u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear) -{ - if (!dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.get_interrupt_status) - return 0; - - if (irq_idx < 0) { - DPU_ERROR("[%pS] invalid irq_idx=%d\n", - __builtin_return_address(0), irq_idx); - return 0; - } - - return dpu_kms->hw_intr->ops.get_interrupt_status(dpu_kms->hw_intr, - irq_idx, clear); -} - -int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, - struct dpu_irq_callback *register_irq_cb) -{ - unsigned long irq_flags; - - if (!dpu_kms->irq_obj.irq_cb_tbl) { - DPU_ERROR("invalid params\n"); - return -EINVAL; - } - - if (!register_irq_cb || !register_irq_cb->func) { - DPU_ERROR("invalid irq_cb:%d func:%d\n", - register_irq_cb != NULL, - register_irq_cb ? - register_irq_cb->func != NULL : -1); - return -EINVAL; - } - - if (irq_idx < 0 || irq_idx >= dpu_kms->hw_intr->total_irqs) { - DPU_ERROR("invalid IRQ index: [%d]\n", irq_idx); - return -EINVAL; - } - - VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); - - irq_flags = dpu_kms->hw_intr->ops.lock(dpu_kms->hw_intr); - trace_dpu_core_irq_register_callback(irq_idx, register_irq_cb); - list_del_init(®ister_irq_cb->list); - list_add_tail(®ister_irq_cb->list, - &dpu_kms->irq_obj.irq_cb_tbl[irq_idx]); - if (list_is_first(®ister_irq_cb->list, - &dpu_kms->irq_obj.irq_cb_tbl[irq_idx])) { - int ret = dpu_kms->hw_intr->ops.enable_irq_locked( - dpu_kms->hw_intr, - irq_idx); - if (ret) - DPU_ERROR("Fail to enable IRQ for irq_idx:%d\n", - irq_idx); - } - dpu_kms->hw_intr->ops.unlock(dpu_kms->hw_intr, irq_flags); - - return 0; -} - -int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, - struct dpu_irq_callback *register_irq_cb) -{ - unsigned long irq_flags; - - if (!dpu_kms->irq_obj.irq_cb_tbl) { - DPU_ERROR("invalid params\n"); - return -EINVAL; - } - - if (!register_irq_cb || !register_irq_cb->func) { - DPU_ERROR("invalid irq_cb:%d func:%d\n", - register_irq_cb != NULL, - register_irq_cb ? - register_irq_cb->func != NULL : -1); - return -EINVAL; - } - - if (irq_idx < 0 || irq_idx >= dpu_kms->hw_intr->total_irqs) { - DPU_ERROR("invalid IRQ index: [%d]\n", irq_idx); - return -EINVAL; - } - - VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); - - irq_flags = dpu_kms->hw_intr->ops.lock(dpu_kms->hw_intr); - trace_dpu_core_irq_unregister_callback(irq_idx, register_irq_cb); - list_del_init(®ister_irq_cb->list); - /* empty callback list but interrupt is still enabled */ - if (list_empty(&dpu_kms->irq_obj.irq_cb_tbl[irq_idx])) { - int ret = dpu_kms->hw_intr->ops.disable_irq_locked( - dpu_kms->hw_intr, - irq_idx); - if (ret) - DPU_ERROR("Fail to disable IRQ for irq_idx:%d\n", - irq_idx); - VERB("irq_idx=%d ret=%d\n", irq_idx, ret); - } - dpu_kms->hw_intr->ops.unlock(dpu_kms->hw_intr, irq_flags); - - return 0; -} - -static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) -{ - if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.clear_all_irqs) - return; - - dpu_kms->hw_intr->ops.clear_all_irqs(dpu_kms->hw_intr); -} - -static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms) -{ - if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.disable_all_irqs) - return; - - dpu_kms->hw_intr->ops.disable_all_irqs(dpu_kms->hw_intr); -} - -#ifdef CONFIG_DEBUG_FS -static int dpu_debugfs_core_irq_show(struct seq_file *s, void *v) -{ - struct dpu_kms *dpu_kms = s->private; - struct dpu_irq *irq_obj = &dpu_kms->irq_obj; - struct dpu_irq_callback *cb; - unsigned long irq_flags; - int i, irq_count, cb_count; - - if (WARN_ON(!irq_obj->irq_cb_tbl)) - return 0; - - for (i = 0; i < irq_obj->total_irqs; i++) { - irq_flags = dpu_kms->hw_intr->ops.lock(dpu_kms->hw_intr); - cb_count = 0; - irq_count = atomic_read(&irq_obj->irq_counts[i]); - list_for_each_entry(cb, &irq_obj->irq_cb_tbl[i], list) - cb_count++; - dpu_kms->hw_intr->ops.unlock(dpu_kms->hw_intr, irq_flags); - - if (irq_count || cb_count) - seq_printf(s, "idx:%d irq:%d cb:%d\n", - i, irq_count, cb_count); - } - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(dpu_debugfs_core_irq); - -void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms, - struct dentry *parent) -{ - debugfs_create_file("core_irq", 0600, parent, dpu_kms, - &dpu_debugfs_core_irq_fops); -} -#endif - -void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) -{ - int i; - - pm_runtime_get_sync(&dpu_kms->pdev->dev); - dpu_clear_all_irqs(dpu_kms); - dpu_disable_all_irqs(dpu_kms); - pm_runtime_put_sync(&dpu_kms->pdev->dev); - - /* Create irq callbacks for all possible irq_idx */ - dpu_kms->irq_obj.total_irqs = dpu_kms->hw_intr->total_irqs; - dpu_kms->irq_obj.irq_cb_tbl = kcalloc(dpu_kms->irq_obj.total_irqs, - sizeof(struct list_head), GFP_KERNEL); - dpu_kms->irq_obj.irq_counts = kcalloc(dpu_kms->irq_obj.total_irqs, - sizeof(atomic_t), GFP_KERNEL); - for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++) { - INIT_LIST_HEAD(&dpu_kms->irq_obj.irq_cb_tbl[i]); - atomic_set(&dpu_kms->irq_obj.irq_counts[i], 0); - } -} - -void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms) -{ - int i; - - pm_runtime_get_sync(&dpu_kms->pdev->dev); - for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++) - if (!list_empty(&dpu_kms->irq_obj.irq_cb_tbl[i])) - DPU_ERROR("irq_idx=%d still enabled/registered\n", i); - - dpu_clear_all_irqs(dpu_kms); - dpu_disable_all_irqs(dpu_kms); - pm_runtime_put_sync(&dpu_kms->pdev->dev); - - kfree(dpu_kms->irq_obj.irq_cb_tbl); - kfree(dpu_kms->irq_obj.irq_counts); - dpu_kms->irq_obj.irq_cb_tbl = NULL; - dpu_kms->irq_obj.irq_counts = NULL; - dpu_kms->irq_obj.total_irqs = 0; -} - -irqreturn_t dpu_core_irq(struct dpu_kms *dpu_kms) -{ - /* - * Dispatch to HW driver to handle interrupt lookup that is being - * fired. When matching interrupt is located, HW driver will call to - * dpu_core_irq_callback_handler with the irq_idx from the lookup table. - * dpu_core_irq_callback_handler will perform the registered function - * callback, and do the interrupt status clearing once the registered - * callback is finished. - * Function will also clear the interrupt status after reading. - */ - dpu_kms->hw_intr->ops.dispatch_irqs( - dpu_kms->hw_intr, - dpu_core_irq_callback_handler, - dpu_kms); - - return IRQ_HANDLED; -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 768012243b44..967245b8cc02 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2014-2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved. * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> */ @@ -70,17 +70,147 @@ static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc) return NULL; } -static u32 dpu_crtc_get_vblank_counter(struct drm_crtc *crtc) +static enum dpu_crtc_crc_source dpu_crtc_parse_crc_source(const char *src_name) { - struct drm_encoder *encoder; + if (!src_name || + !strcmp(src_name, "none")) + return DPU_CRTC_CRC_SOURCE_NONE; + if (!strcmp(src_name, "auto") || + !strcmp(src_name, "lm")) + return DPU_CRTC_CRC_SOURCE_LAYER_MIXER; + + return DPU_CRTC_CRC_SOURCE_INVALID; +} - encoder = get_encoder_from_crtc(crtc); +static int dpu_crtc_verify_crc_source(struct drm_crtc *crtc, + const char *src_name, size_t *values_cnt) +{ + enum dpu_crtc_crc_source source = dpu_crtc_parse_crc_source(src_name); + struct dpu_crtc_state *crtc_state = to_dpu_crtc_state(crtc->state); + + if (source < 0) { + DRM_DEBUG_DRIVER("Invalid source %s for CRTC%d\n", src_name, crtc->index); + return -EINVAL; + } + + if (source == DPU_CRTC_CRC_SOURCE_LAYER_MIXER) + *values_cnt = crtc_state->num_mixers; + + return 0; +} + +static int dpu_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) +{ + enum dpu_crtc_crc_source source = dpu_crtc_parse_crc_source(src_name); + enum dpu_crtc_crc_source current_source; + struct dpu_crtc_state *crtc_state; + struct drm_device *drm_dev = crtc->dev; + struct dpu_crtc_mixer *m; + + bool was_enabled; + bool enable = false; + int i, ret = 0; + + if (source < 0) { + DRM_DEBUG_DRIVER("Invalid CRC source %s for CRTC%d\n", src_name, crtc->index); + return -EINVAL; + } + + ret = drm_modeset_lock(&crtc->mutex, NULL); + + if (ret) + return ret; + + enable = (source != DPU_CRTC_CRC_SOURCE_NONE); + crtc_state = to_dpu_crtc_state(crtc->state); + + spin_lock_irq(&drm_dev->event_lock); + current_source = crtc_state->crc_source; + spin_unlock_irq(&drm_dev->event_lock); + + was_enabled = (current_source != DPU_CRTC_CRC_SOURCE_NONE); + + if (!was_enabled && enable) { + ret = drm_crtc_vblank_get(crtc); + + if (ret) + goto cleanup; + + } else if (was_enabled && !enable) { + drm_crtc_vblank_put(crtc); + } + + spin_lock_irq(&drm_dev->event_lock); + crtc_state->crc_source = source; + spin_unlock_irq(&drm_dev->event_lock); + + crtc_state->crc_frame_skip_count = 0; + + for (i = 0; i < crtc_state->num_mixers; ++i) { + m = &crtc_state->mixers[i]; + + if (!m->hw_lm || !m->hw_lm->ops.setup_misr) + continue; + + /* Calculate MISR over 1 frame */ + m->hw_lm->ops.setup_misr(m->hw_lm, true, 1); + } + + +cleanup: + drm_modeset_unlock(&crtc->mutex); + + return ret; +} + +static u32 dpu_crtc_get_vblank_counter(struct drm_crtc *crtc) +{ + struct drm_encoder *encoder = get_encoder_from_crtc(crtc); if (!encoder) { DRM_ERROR("no encoder found for crtc %d\n", crtc->index); - return false; + return 0; + } + + return dpu_encoder_get_vsync_count(encoder); +} + + +static int dpu_crtc_get_crc(struct drm_crtc *crtc) +{ + struct dpu_crtc_state *crtc_state; + struct dpu_crtc_mixer *m; + u32 crcs[CRTC_DUAL_MIXERS]; + + int i = 0; + int rc = 0; + + crtc_state = to_dpu_crtc_state(crtc->state); + + BUILD_BUG_ON(ARRAY_SIZE(crcs) != ARRAY_SIZE(crtc_state->mixers)); + + /* Skip first 2 frames in case of "uncooked" CRCs */ + if (crtc_state->crc_frame_skip_count < 2) { + crtc_state->crc_frame_skip_count++; + return 0; } - return dpu_encoder_get_frame_count(encoder); + for (i = 0; i < crtc_state->num_mixers; ++i) { + + m = &crtc_state->mixers[i]; + + if (!m->hw_lm || !m->hw_lm->ops.collect_misr) + continue; + + rc = m->hw_lm->ops.collect_misr(m->hw_lm, &crcs[i]); + + if (rc) { + DRM_DEBUG_DRIVER("MISR read failed\n"); + return rc; + } + } + + return drm_crtc_add_crc_entry(crtc, true, + drm_crtc_accurate_vblank_count(crtc), crcs); } static bool dpu_crtc_get_scanout_position(struct drm_crtc *crtc, @@ -389,6 +519,9 @@ void dpu_crtc_vblank_callback(struct drm_crtc *crtc) dpu_crtc->vblank_cb_time = ktime_get(); else dpu_crtc->vblank_cb_count++; + + dpu_crtc_get_crc(crtc); + drm_crtc_handle_vblank(crtc); trace_dpu_crtc_vblank_cb(DRMID(crtc)); } @@ -1332,6 +1465,8 @@ static const struct drm_crtc_funcs dpu_crtc_funcs = { .atomic_destroy_state = dpu_crtc_destroy_state, .late_register = dpu_crtc_late_register, .early_unregister = dpu_crtc_early_unregister, + .verify_crc_source = dpu_crtc_verify_crc_source, + .set_crc_source = dpu_crtc_set_crc_source, .enable_vblank = msm_crtc_enable_vblank, .disable_vblank = msm_crtc_disable_vblank, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h index cec3474340e8..ae9546ca1359 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015-2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2021 The Linux Foundation. All rights reserved. * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> */ @@ -70,6 +70,19 @@ struct dpu_crtc_smmu_state_data { }; /** + * enum dpu_crtc_crc_source: CRC source + * @DPU_CRTC_CRC_SOURCE_NONE: no source set + * @DPU_CRTC_CRC_SOURCE_LAYER_MIXER: CRC in layer mixer + * @DPU_CRTC_CRC_SOURCE_INVALID: Invalid source + */ +enum dpu_crtc_crc_source { + DPU_CRTC_CRC_SOURCE_NONE = 0, + DPU_CRTC_CRC_SOURCE_LAYER_MIXER, + DPU_CRTC_CRC_SOURCE_MAX, + DPU_CRTC_CRC_SOURCE_INVALID = -1 +}; + +/** * struct dpu_crtc_mixer: stores the map for each virtual pipeline in the CRTC * @hw_lm: LM HW Driver context * @lm_ctl: CTL Path HW driver context @@ -139,6 +152,7 @@ struct dpu_crtc_frame_event { * @event_lock : Spinlock around event handling code * @phandle: Pointer to power handler * @cur_perf : current performance committed to clock/bandwidth driver + * @crc_source : CRC source */ struct dpu_crtc { struct drm_crtc base; @@ -210,6 +224,9 @@ struct dpu_crtc_state { u32 num_ctls; struct dpu_hw_ctl *hw_ctls[CRTC_DUAL_MIXERS]; + + enum dpu_crtc_crc_source crc_source; + int crc_frame_skip_count; }; #define to_dpu_crtc_state(x) \ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 0e9d3fa1544b..e7ee4cfb8461 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -168,6 +168,7 @@ enum dpu_enc_rc_states { * @vsync_event_work: worker to handle vsync event for autorefresh * @topology: topology of the display * @idle_timeout: idle timeout duration in milliseconds + * @dp: msm_dp pointer, for DP encoders */ struct dpu_encoder_virt { struct drm_encoder base; @@ -206,6 +207,8 @@ struct dpu_encoder_virt { struct msm_display_topology topology; u32 idle_timeout; + + struct msm_dp *dp; }; #define to_dpu_encoder_virt(x) container_of(x, struct dpu_encoder_virt, base) @@ -395,19 +398,11 @@ int dpu_encoder_helper_unregister_irq(struct dpu_encoder_phys *phys_enc, return 0; } -int dpu_encoder_get_frame_count(struct drm_encoder *drm_enc) +int dpu_encoder_get_vsync_count(struct drm_encoder *drm_enc) { - struct dpu_encoder_virt *dpu_enc; - struct dpu_encoder_phys *phys; - int framecount = 0; - - dpu_enc = to_dpu_encoder_virt(drm_enc); - phys = dpu_enc ? dpu_enc->cur_master : NULL; - - if (phys && phys->ops.get_frame_count) - framecount = phys->ops.get_frame_count(phys); - - return framecount; + struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); + struct dpu_encoder_phys *phys = dpu_enc ? dpu_enc->cur_master : NULL; + return phys ? atomic_read(&phys->vsync_cnt) : 0; } int dpu_encoder_get_linecount(struct drm_encoder *drm_enc) @@ -1000,8 +995,8 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, trace_dpu_enc_mode_set(DRMID(drm_enc)); - if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) - msm_dp_display_mode_set(priv->dp, drm_enc, mode, adj_mode); + if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) + msm_dp_display_mode_set(dpu_enc->dp, drm_enc, mode, adj_mode); list_for_each_entry(conn_iter, connector_list, head) if (conn_iter->encoder == drm_enc) @@ -1182,9 +1177,8 @@ static void dpu_encoder_virt_enable(struct drm_encoder *drm_enc) _dpu_encoder_virt_enable_helper(drm_enc); - if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) { - ret = msm_dp_display_enable(priv->dp, - drm_enc); + if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) { + ret = msm_dp_display_enable(dpu_enc->dp, drm_enc); if (ret) { DPU_ERROR_ENC(dpu_enc, "dp display enable failed: %d\n", ret); @@ -1224,8 +1218,8 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) /* wait for idle */ dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE); - if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) { - if (msm_dp_display_pre_disable(priv->dp, drm_enc)) + if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) { + if (msm_dp_display_pre_disable(dpu_enc->dp, drm_enc)) DPU_ERROR_ENC(dpu_enc, "dp display push idle failed\n"); } @@ -1253,8 +1247,8 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n"); - if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) { - if (msm_dp_display_disable(priv->dp, drm_enc)) + if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) { + if (msm_dp_display_disable(dpu_enc->dp, drm_enc)) DPU_ERROR_ENC(dpu_enc, "dp display disable failed\n"); } @@ -2170,7 +2164,8 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc, timer_setup(&dpu_enc->vsync_event_timer, dpu_encoder_vsync_event_handler, 0); - + else if (disp_info->intf_type == DRM_MODE_ENCODER_TMDS) + dpu_enc->dp = priv->dp[disp_info->h_tile_instance[0]]; INIT_DELAYED_WORK(&dpu_enc->delayed_off_work, dpu_encoder_off_work); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h index 99a5d73c9b88..e241914a9677 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h @@ -163,9 +163,9 @@ void dpu_encoder_set_idle_timeout(struct drm_encoder *drm_enc, int dpu_encoder_get_linecount(struct drm_encoder *drm_enc); /** - * dpu_encoder_get_frame_count - get interface frame count for the encoder. + * dpu_encoder_get_vsync_count - get vsync count for the encoder. * @drm_enc: Pointer to previously created drm encoder structure */ -int dpu_encoder_get_frame_count(struct drm_encoder *drm_enc); +int dpu_encoder_get_vsync_count(struct drm_encoder *drm_enc); #endif /* __DPU_ENCODER_H__ */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index aa01698d6b25..34a6940d12c5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -42,7 +42,7 @@ static bool dpu_encoder_phys_cmd_is_master(struct dpu_encoder_phys *phys_enc) { - return (phys_enc->split_role != ENC_ROLE_SLAVE) ? true : false; + return (phys_enc->split_role != ENC_ROLE_SLAVE); } static bool dpu_encoder_phys_cmd_mode_fixup( diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index b131fd376192..ce6f32a919e5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -794,7 +794,7 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = { DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), -1), PP_BLK("pingpong_5", PINGPONG_5, 0x72800, MERGE_3D_2, sdm845_pp_sblk, - DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), + DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), -1), }; @@ -844,7 +844,7 @@ static const struct dpu_intf_cfg sdm845_intf[] = { }; static const struct dpu_intf_cfg sc7180_intf[] = { - INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, 24, INTF_SC7180_MASK, MDP_SSPP_TOP0_INTR, 24, 25), + INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, MSM_DP_CONTROLLER_0, 24, INTF_SC7180_MASK, MDP_SSPP_TOP0_INTR, 24, 25), INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, 24, INTF_SC7180_MASK, MDP_SSPP_TOP0_INTR, 26, 27), }; @@ -958,12 +958,6 @@ static const struct dpu_perf_cfg sdm845_perf_data = { .min_core_ib = 2400000, .min_llcc_ib = 800000, .min_dram_ib = 800000, - .core_ib_ff = "6.0", - .core_clk_ff = "1.0", - .comp_ratio_rt = - "NV12/5/1/1.23 AB24/5/1/1.23 XB24/5/1/1.23", - .comp_ratio_nrt = - "NV12/5/1/1.25 AB24/5/1/1.25 XB24/5/1/1.25", .undersized_prefill_lines = 2, .xtra_prefill_lines = 2, .dest_scale_prefill_lines = 3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index d2a945a27cfa..4ade44bbd37e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -676,10 +676,6 @@ struct dpu_perf_cdp_cfg { * @min_core_ib minimum mnoc ib vote in kbps * @min_llcc_ib minimum llcc ib vote in kbps * @min_dram_ib minimum dram ib vote in kbps - * @core_ib_ff core instantaneous bandwidth fudge factor - * @core_clk_ff core clock fudge factor - * @comp_ratio_rt string of 0 or more of <fourcc>/<ven>/<mod>/<comp ratio> - * @comp_ratio_nrt string of 0 or more of <fourcc>/<ven>/<mod>/<comp ratio> * @undersized_prefill_lines undersized prefill in lines * @xtra_prefill_lines extra prefill latency in lines * @dest_scale_prefill_lines destination scaler latency in lines @@ -702,10 +698,6 @@ struct dpu_perf_cfg { u32 min_core_ib; u32 min_llcc_ib; u32 min_dram_ib; - const char *core_ib_ff; - const char *core_clk_ff; - const char *comp_ratio_rt; - const char *comp_ratio_nrt; u32 undersized_prefill_lines; u32 xtra_prefill_lines; u32 dest_scale_prefill_lines; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index 2e816f232e85..d2b6dca487e3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -3,12 +3,15 @@ */ #include <linux/bitops.h> +#include <linux/debugfs.h> #include <linux/slab.h> +#include "dpu_core_irq.h" #include "dpu_kms.h" #include "dpu_hw_interrupts.h" #include "dpu_hw_util.h" #include "dpu_hw_mdss.h" +#include "dpu_trace.h" /** * Register offsets in MDSS register file for the interrupt registers @@ -117,25 +120,33 @@ static const struct dpu_intr_reg dpu_intr_set[] = { #define DPU_IRQ_REG(irq_idx) (irq_idx / 32) #define DPU_IRQ_MASK(irq_idx) (BIT(irq_idx % 32)) -static void dpu_hw_intr_clear_intr_status_nolock(struct dpu_hw_intr *intr, - int irq_idx) +/** + * dpu_core_irq_callback_handler - dispatch core interrupts + * @arg: private data of callback handler + * @irq_idx: interrupt index + */ +static void dpu_core_irq_callback_handler(struct dpu_kms *dpu_kms, int irq_idx) { - int reg_idx; + struct dpu_irq_callback *cb; - if (!intr) - return; + VERB("irq_idx=%d\n", irq_idx); - reg_idx = DPU_IRQ_REG(irq_idx); - DPU_REG_WRITE(&intr->hw, dpu_intr_set[reg_idx].clr_off, DPU_IRQ_MASK(irq_idx)); + if (list_empty(&dpu_kms->hw_intr->irq_cb_tbl[irq_idx])) + DRM_ERROR("no registered cb, idx:%d\n", irq_idx); - /* ensure register writes go through */ - wmb(); + atomic_inc(&dpu_kms->hw_intr->irq_counts[irq_idx]); + + /* + * Perform registered function callback + */ + list_for_each_entry(cb, &dpu_kms->hw_intr->irq_cb_tbl[irq_idx], list) + if (cb->func) + cb->func(cb->arg, irq_idx); } -static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr, - void (*cbfunc)(void *, int), - void *arg) +irqreturn_t dpu_core_irq(struct dpu_kms *dpu_kms) { + struct dpu_hw_intr *intr = dpu_kms->hw_intr; int reg_idx; int irq_idx; u32 irq_status; @@ -144,13 +155,8 @@ static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr, unsigned long irq_flags; if (!intr) - return; + return IRQ_NONE; - /* - * The dispatcher will save the IRQ status before calling here. - * Now need to go through each IRQ status and find matching - * irq lookup index. - */ spin_lock_irqsave(&intr->irq_lock, irq_flags); for (reg_idx = 0; reg_idx < ARRAY_SIZE(dpu_intr_set); reg_idx++) { if (!test_bit(reg_idx, &intr->irq_mask)) @@ -178,17 +184,8 @@ static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr, */ while ((bit = ffs(irq_status)) != 0) { irq_idx = DPU_IRQ_IDX(reg_idx, bit - 1); - /* - * Once a match on irq mask, perform a callback - * to the given cbfunc. cbfunc will take care - * the interrupt status clearing. If cbfunc is - * not provided, then the interrupt clearing - * is here. - */ - if (cbfunc) - cbfunc(arg, irq_idx); - dpu_hw_intr_clear_intr_status_nolock(intr, irq_idx); + dpu_core_irq_callback_handler(dpu_kms, irq_idx); /* * When callback finish, clear the irq_status @@ -203,6 +200,8 @@ static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr, wmb(); spin_unlock_irqrestore(&intr->irq_lock, irq_flags); + + return IRQ_HANDLED; } static int dpu_hw_intr_enable_irq_locked(struct dpu_hw_intr *intr, int irq_idx) @@ -303,12 +302,13 @@ static int dpu_hw_intr_disable_irq_locked(struct dpu_hw_intr *intr, int irq_idx) return 0; } -static int dpu_hw_intr_clear_irqs(struct dpu_hw_intr *intr) +static void dpu_clear_irqs(struct dpu_kms *dpu_kms) { + struct dpu_hw_intr *intr = dpu_kms->hw_intr; int i; if (!intr) - return -EINVAL; + return; for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) { if (test_bit(i, &intr->irq_mask)) @@ -318,16 +318,15 @@ static int dpu_hw_intr_clear_irqs(struct dpu_hw_intr *intr) /* ensure register writes go through */ wmb(); - - return 0; } -static int dpu_hw_intr_disable_irqs(struct dpu_hw_intr *intr) +static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms) { + struct dpu_hw_intr *intr = dpu_kms->hw_intr; int i; if (!intr) - return -EINVAL; + return; for (i = 0; i < ARRAY_SIZE(dpu_intr_set); i++) { if (test_bit(i, &intr->irq_mask)) @@ -337,13 +336,11 @@ static int dpu_hw_intr_disable_irqs(struct dpu_hw_intr *intr) /* ensure register writes go through */ wmb(); - - return 0; } -static u32 dpu_hw_intr_get_interrupt_status(struct dpu_hw_intr *intr, - int irq_idx, bool clear) +u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear) { + struct dpu_hw_intr *intr = dpu_kms->hw_intr; int reg_idx; unsigned long irq_flags; u32 intr_status; @@ -351,6 +348,12 @@ static u32 dpu_hw_intr_get_interrupt_status(struct dpu_hw_intr *intr, if (!intr) return 0; + if (irq_idx < 0) { + DPU_ERROR("[%pS] invalid irq_idx=%d\n", + __builtin_return_address(0), irq_idx); + return 0; + } + if (irq_idx < 0 || irq_idx >= intr->total_irqs) { pr_err("invalid IRQ index: [%d]\n", irq_idx); return 0; @@ -374,32 +377,6 @@ static u32 dpu_hw_intr_get_interrupt_status(struct dpu_hw_intr *intr, return intr_status; } -static unsigned long dpu_hw_intr_lock(struct dpu_hw_intr *intr) -{ - unsigned long irq_flags; - - spin_lock_irqsave(&intr->irq_lock, irq_flags); - - return irq_flags; -} - -static void dpu_hw_intr_unlock(struct dpu_hw_intr *intr, unsigned long irq_flags) -{ - spin_unlock_irqrestore(&intr->irq_lock, irq_flags); -} - -static void __setup_intr_ops(struct dpu_hw_intr_ops *ops) -{ - ops->enable_irq_locked = dpu_hw_intr_enable_irq_locked; - ops->disable_irq_locked = dpu_hw_intr_disable_irq_locked; - ops->dispatch_irqs = dpu_hw_intr_dispatch_irq; - ops->clear_all_irqs = dpu_hw_intr_clear_irqs; - ops->disable_all_irqs = dpu_hw_intr_disable_irqs; - ops->get_interrupt_status = dpu_hw_intr_get_interrupt_status; - ops->lock = dpu_hw_intr_lock; - ops->unlock = dpu_hw_intr_unlock; -} - static void __intr_offset(struct dpu_mdss_cfg *m, void __iomem *addr, struct dpu_hw_blk_reg_map *hw) { @@ -421,7 +398,6 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, return ERR_PTR(-ENOMEM); __intr_offset(m, addr, &intr->hw); - __setup_intr_ops(&intr->ops); intr->total_irqs = ARRAY_SIZE(dpu_intr_set) * 32; @@ -443,7 +419,168 @@ void dpu_hw_intr_destroy(struct dpu_hw_intr *intr) { if (intr) { kfree(intr->cache_irq_mask); + + kfree(intr->irq_cb_tbl); + kfree(intr->irq_counts); + kfree(intr); } } +int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, + struct dpu_irq_callback *register_irq_cb) +{ + unsigned long irq_flags; + + if (!dpu_kms->hw_intr->irq_cb_tbl) { + DPU_ERROR("invalid params\n"); + return -EINVAL; + } + + if (!register_irq_cb || !register_irq_cb->func) { + DPU_ERROR("invalid irq_cb:%d func:%d\n", + register_irq_cb != NULL, + register_irq_cb ? + register_irq_cb->func != NULL : -1); + return -EINVAL; + } + + if (irq_idx < 0 || irq_idx >= dpu_kms->hw_intr->total_irqs) { + DPU_ERROR("invalid IRQ index: [%d]\n", irq_idx); + return -EINVAL; + } + + VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); + + spin_lock_irqsave(&dpu_kms->hw_intr->irq_lock, irq_flags); + trace_dpu_core_irq_register_callback(irq_idx, register_irq_cb); + list_del_init(®ister_irq_cb->list); + list_add_tail(®ister_irq_cb->list, + &dpu_kms->hw_intr->irq_cb_tbl[irq_idx]); + if (list_is_first(®ister_irq_cb->list, + &dpu_kms->hw_intr->irq_cb_tbl[irq_idx])) { + int ret = dpu_hw_intr_enable_irq_locked( + dpu_kms->hw_intr, + irq_idx); + if (ret) + DPU_ERROR("Fail to enable IRQ for irq_idx:%d\n", + irq_idx); + } + spin_unlock_irqrestore(&dpu_kms->hw_intr->irq_lock, irq_flags); + + return 0; +} + +int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, + struct dpu_irq_callback *register_irq_cb) +{ + unsigned long irq_flags; + + if (!dpu_kms->hw_intr->irq_cb_tbl) { + DPU_ERROR("invalid params\n"); + return -EINVAL; + } + + if (!register_irq_cb || !register_irq_cb->func) { + DPU_ERROR("invalid irq_cb:%d func:%d\n", + register_irq_cb != NULL, + register_irq_cb ? + register_irq_cb->func != NULL : -1); + return -EINVAL; + } + + if (irq_idx < 0 || irq_idx >= dpu_kms->hw_intr->total_irqs) { + DPU_ERROR("invalid IRQ index: [%d]\n", irq_idx); + return -EINVAL; + } + + VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); + + spin_lock_irqsave(&dpu_kms->hw_intr->irq_lock, irq_flags); + trace_dpu_core_irq_unregister_callback(irq_idx, register_irq_cb); + list_del_init(®ister_irq_cb->list); + /* empty callback list but interrupt is still enabled */ + if (list_empty(&dpu_kms->hw_intr->irq_cb_tbl[irq_idx])) { + int ret = dpu_hw_intr_disable_irq_locked( + dpu_kms->hw_intr, + irq_idx); + if (ret) + DPU_ERROR("Fail to disable IRQ for irq_idx:%d\n", + irq_idx); + VERB("irq_idx=%d ret=%d\n", irq_idx, ret); + } + spin_unlock_irqrestore(&dpu_kms->hw_intr->irq_lock, irq_flags); + + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static int dpu_debugfs_core_irq_show(struct seq_file *s, void *v) +{ + struct dpu_kms *dpu_kms = s->private; + struct dpu_irq_callback *cb; + unsigned long irq_flags; + int i, irq_count, cb_count; + + if (WARN_ON(!dpu_kms->hw_intr->irq_cb_tbl)) + return 0; + + for (i = 0; i < dpu_kms->hw_intr->total_irqs; i++) { + spin_lock_irqsave(&dpu_kms->hw_intr->irq_lock, irq_flags); + cb_count = 0; + irq_count = atomic_read(&dpu_kms->hw_intr->irq_counts[i]); + list_for_each_entry(cb, &dpu_kms->hw_intr->irq_cb_tbl[i], list) + cb_count++; + spin_unlock_irqrestore(&dpu_kms->hw_intr->irq_lock, irq_flags); + + if (irq_count || cb_count) + seq_printf(s, "idx:%d irq:%d cb:%d\n", + i, irq_count, cb_count); + } + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(dpu_debugfs_core_irq); + +void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms, + struct dentry *parent) +{ + debugfs_create_file("core_irq", 0600, parent, dpu_kms, + &dpu_debugfs_core_irq_fops); +} +#endif + +void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) +{ + int i; + + pm_runtime_get_sync(&dpu_kms->pdev->dev); + dpu_clear_irqs(dpu_kms); + dpu_disable_all_irqs(dpu_kms); + pm_runtime_put_sync(&dpu_kms->pdev->dev); + + /* Create irq callbacks for all possible irq_idx */ + dpu_kms->hw_intr->irq_cb_tbl = kcalloc(dpu_kms->hw_intr->total_irqs, + sizeof(struct list_head), GFP_KERNEL); + dpu_kms->hw_intr->irq_counts = kcalloc(dpu_kms->hw_intr->total_irqs, + sizeof(atomic_t), GFP_KERNEL); + for (i = 0; i < dpu_kms->hw_intr->total_irqs; i++) { + INIT_LIST_HEAD(&dpu_kms->hw_intr->irq_cb_tbl[i]); + atomic_set(&dpu_kms->hw_intr->irq_counts[i], 0); + } +} + +void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms) +{ + int i; + + pm_runtime_get_sync(&dpu_kms->pdev->dev); + for (i = 0; i < dpu_kms->hw_intr->total_irqs; i++) + if (!list_empty(&dpu_kms->hw_intr->irq_cb_tbl[i])) + DPU_ERROR("irq_idx=%d still enabled/registered\n", i); + + dpu_clear_irqs(dpu_kms); + dpu_disable_all_irqs(dpu_kms); + pm_runtime_put_sync(&dpu_kms->pdev->dev); +} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h index ac83c1159815..d50e78c9f148 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h @@ -32,92 +32,6 @@ enum dpu_hw_intr_reg { #define DPU_IRQ_IDX(reg_idx, offset) (reg_idx * 32 + offset) -struct dpu_hw_intr; - -/** - * Interrupt operations. - */ -struct dpu_hw_intr_ops { - - /** - * enable_irq - Enable IRQ based on lookup IRQ index - * @intr: HW interrupt handle - * @irq_idx: Lookup irq index return from irq_idx_lookup - * @return: 0 for success, otherwise failure - */ - int (*enable_irq_locked)( - struct dpu_hw_intr *intr, - int irq_idx); - - /** - * disable_irq - Disable IRQ based on lookup IRQ index - * @intr: HW interrupt handle - * @irq_idx: Lookup irq index return from irq_idx_lookup - * @return: 0 for success, otherwise failure - */ - int (*disable_irq_locked)( - struct dpu_hw_intr *intr, - int irq_idx); - - /** - * clear_all_irqs - Clears all the interrupts (i.e. acknowledges - * any asserted IRQs). Useful during reset. - * @intr: HW interrupt handle - * @return: 0 for success, otherwise failure - */ - int (*clear_all_irqs)( - struct dpu_hw_intr *intr); - - /** - * disable_all_irqs - Disables all the interrupts. Useful during reset. - * @intr: HW interrupt handle - * @return: 0 for success, otherwise failure - */ - int (*disable_all_irqs)( - struct dpu_hw_intr *intr); - - /** - * dispatch_irqs - IRQ dispatcher will call the given callback - * function when a matching interrupt status bit is - * found in the irq mapping table. - * @intr: HW interrupt handle - * @cbfunc: Callback function pointer - * @arg: Argument to pass back during callback - */ - void (*dispatch_irqs)( - struct dpu_hw_intr *intr, - void (*cbfunc)(void *arg, int irq_idx), - void *arg); - - /** - * get_interrupt_status - Gets HW interrupt status, and clear if set, - * based on given lookup IRQ index. - * @intr: HW interrupt handle - * @irq_idx: Lookup irq index return from irq_idx_lookup - * @clear: True to clear irq after read - */ - u32 (*get_interrupt_status)( - struct dpu_hw_intr *intr, - int irq_idx, - bool clear); - - /** - * lock - take the IRQ lock - * @intr: HW interrupt handle - * @return: irq_flags for the taken spinlock - */ - unsigned long (*lock)( - struct dpu_hw_intr *intr); - - /** - * unlock - take the IRQ lock - * @intr: HW interrupt handle - * @irq_flags: the irq_flags returned from lock - */ - void (*unlock)( - struct dpu_hw_intr *intr, unsigned long irq_flags); -}; - /** * struct dpu_hw_intr: hw interrupts handling data structure * @hw: virtual address mapping @@ -126,15 +40,19 @@ struct dpu_hw_intr_ops { * @save_irq_status: array of IRQ status reg storage created during init * @total_irqs: total number of irq_idx mapped in the hw_interrupts * @irq_lock: spinlock for accessing IRQ resources + * @irq_cb_tbl: array of IRQ callbacks lists + * @irq_counts: array of IRQ counts */ struct dpu_hw_intr { struct dpu_hw_blk_reg_map hw; - struct dpu_hw_intr_ops ops; u32 *cache_irq_mask; u32 *save_irq_status; u32 total_irqs; spinlock_t irq_lock; unsigned long irq_mask; + + struct list_head *irq_cb_tbl; + atomic_t *irq_counts; }; /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index cb6bb7a22c15..86363c0ec834 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. +/* + * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ #include "dpu_kms.h" @@ -24,6 +25,15 @@ #define LM_BLEND0_FG_ALPHA 0x04 #define LM_BLEND0_BG_ALPHA 0x08 +#define LM_MISR_CTRL 0x310 +#define LM_MISR_SIGNATURE 0x314 +#define LM_MISR_FRAME_COUNT_MASK 0xFF +#define LM_MISR_CTRL_ENABLE BIT(8) +#define LM_MISR_CTRL_STATUS BIT(9) +#define LM_MISR_CTRL_STATUS_CLEAR BIT(10) +#define LM_MISR_CTRL_FREE_RUN_MASK BIT(31) + + static const struct dpu_lm_cfg *_lm_offset(enum dpu_lm mixer, const struct dpu_mdss_cfg *m, void __iomem *addr, @@ -96,6 +106,48 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx, } } +static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + u32 config = 0; + + DPU_REG_WRITE(c, LM_MISR_CTRL, LM_MISR_CTRL_STATUS_CLEAR); + + /* Clear old MISR value (in case it's read before a new value is calculated)*/ + wmb(); + + if (enable) { + config = (frame_count & LM_MISR_FRAME_COUNT_MASK) | + LM_MISR_CTRL_ENABLE | LM_MISR_CTRL_FREE_RUN_MASK; + + DPU_REG_WRITE(c, LM_MISR_CTRL, config); + } else { + DPU_REG_WRITE(c, LM_MISR_CTRL, 0); + } + +} + +static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + u32 ctrl = 0; + + if (!misr_value) + return -EINVAL; + + ctrl = DPU_REG_READ(c, LM_MISR_CTRL); + + if (!(ctrl & LM_MISR_CTRL_ENABLE)) + return -EINVAL; + + if (!(ctrl & LM_MISR_CTRL_STATUS)) + return -EINVAL; + + *misr_value = DPU_REG_READ(c, LM_MISR_SIGNATURE); + + return 0; +} + static void dpu_hw_lm_setup_blend_config_sdm845(struct dpu_hw_mixer *ctx, u32 stage, u32 fg_alpha, u32 bg_alpha, u32 blend_op) { @@ -158,6 +210,8 @@ static void _setup_mixer_ops(const struct dpu_mdss_cfg *m, ops->setup_blend_config = dpu_hw_lm_setup_blend_config; ops->setup_alpha_out = dpu_hw_lm_setup_color3; ops->setup_border_color = dpu_hw_lm_setup_border_color; + ops->setup_misr = dpu_hw_lm_setup_misr; + ops->collect_misr = dpu_hw_lm_collect_misr; } struct dpu_hw_mixer *dpu_hw_lm_init(enum dpu_lm idx, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h index 4a6b2de19ef6..d8052fb2d5da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. +/* + * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ #ifndef _DPU_HW_LM_H @@ -53,6 +54,16 @@ struct dpu_hw_lm_ops { void (*setup_border_color)(struct dpu_hw_mixer *ctx, struct dpu_mdss_color *color, u8 border_en); + + /** + * setup_misr: Enable/disable MISR + */ + void (*setup_misr)(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count); + + /** + * collect_misr: Read MISR signature + */ + int (*collect_misr)(struct dpu_hw_mixer *ctx, u32 *misr_value); }; struct dpu_hw_mixer { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 69eed7932486..f9460672176a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -138,11 +138,13 @@ static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx, u32 *idx) { int rc = 0; - const struct dpu_sspp_sub_blks *sblk = ctx->cap->sblk; + const struct dpu_sspp_sub_blks *sblk; - if (!ctx) + if (!ctx || !ctx->cap || !ctx->cap->sblk) return -EINVAL; + sblk = ctx->cap->sblk; + switch (s_id) { case DPU_SSPP_SRC: *idx = sblk->src_blk.base; @@ -419,7 +421,7 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_pipe *ctx, (void)pe; if (_sspp_subblk_offset(ctx, DPU_SSPP_SCALER_QSEED3, &idx) || !sspp - || !scaler3_cfg || !ctx || !ctx->cap || !ctx->cap->sblk) + || !scaler3_cfg) return; dpu_hw_setup_scaler3(&ctx->hw, scaler3_cfg, idx, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index ff3cffde84cd..6d4911957e33 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. +/* + * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ #ifndef _DPU_HW_UTIL_H diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index ae48f41821cf..a15b26428280 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -188,6 +188,7 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) struct dentry *entry; struct drm_device *dev; struct msm_drm_private *priv; + int i; if (!p) return -EINVAL; @@ -203,8 +204,10 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) dpu_debugfs_vbif_init(dpu_kms, entry); dpu_debugfs_core_irq_init(dpu_kms, entry); - if (priv->dp) - msm_dp_debugfs_init(priv->dp, minor); + for (i = 0; i < ARRAY_SIZE(priv->dp); i++) { + if (priv->dp[i]) + msm_dp_debugfs_init(priv->dp[i], minor); + } return dpu_core_perf_debugfs_init(dpu_kms, entry); } @@ -544,35 +547,42 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, { struct drm_encoder *encoder = NULL; struct msm_display_info info; - int rc = 0; + int rc; + int i; - if (!priv->dp) - return rc; + for (i = 0; i < ARRAY_SIZE(priv->dp); i++) { + if (!priv->dp[i]) + continue; - encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_TMDS); - if (IS_ERR(encoder)) { - DPU_ERROR("encoder init failed for dsi display\n"); - return PTR_ERR(encoder); - } + encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_TMDS); + if (IS_ERR(encoder)) { + DPU_ERROR("encoder init failed for dsi display\n"); + return PTR_ERR(encoder); + } - memset(&info, 0, sizeof(info)); - rc = msm_dp_modeset_init(priv->dp, dev, encoder); - if (rc) { - DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); - drm_encoder_cleanup(encoder); - return rc; - } + memset(&info, 0, sizeof(info)); + rc = msm_dp_modeset_init(priv->dp[i], dev, encoder); + if (rc) { + DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); + drm_encoder_cleanup(encoder); + return rc; + } - priv->encoders[priv->num_encoders++] = encoder; + priv->encoders[priv->num_encoders++] = encoder; - info.num_of_h_tiles = 1; - info.capabilities = MSM_DISPLAY_CAP_VID_MODE; - info.intf_type = encoder->encoder_type; - rc = dpu_encoder_setup(dev, encoder, &info); - if (rc) - DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", - encoder->base.id, rc); - return rc; + info.num_of_h_tiles = 1; + info.h_tile_instance[0] = i; + info.capabilities = MSM_DISPLAY_CAP_VID_MODE; + info.intf_type = encoder->encoder_type; + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) { + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); + return rc; + } + } + + return 0; } /** @@ -792,6 +802,7 @@ static int dpu_irq_postinstall(struct msm_kms *kms) { struct msm_drm_private *priv; struct dpu_kms *dpu_kms = to_dpu_kms(kms); + int i; if (!dpu_kms || !dpu_kms->dev) return -EINVAL; @@ -800,7 +811,8 @@ static int dpu_irq_postinstall(struct msm_kms *kms) if (!priv) return -EINVAL; - msm_dp_irq_postinstall(priv->dp); + for (i = 0; i < ARRAY_SIZE(priv->dp); i++) + msm_dp_irq_postinstall(priv->dp[i]); return 0; } @@ -908,6 +920,10 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return 0; mmu = msm_iommu_new(dpu_kms->dev->dev, domain); + if (IS_ERR(mmu)) { + iommu_domain_free(domain); + return PTR_ERR(mmu); + } aspace = msm_gem_address_space_create(mmu, "dpu1", 0x1000, 0x100000000 - 0x1000); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 323a6bce9e64..775bcbda860f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -78,18 +78,6 @@ struct dpu_irq_callback { void *arg; }; -/** - * struct dpu_irq: IRQ structure contains callback registration info - * @total_irq: total number of irq_idx obtained from HW interrupts mapping - * @irq_cb_tbl: array of IRQ callbacks setting - * @debugfs_file: debugfs file for irq statistics - */ -struct dpu_irq { - u32 total_irqs; - struct list_head *irq_cb_tbl; - atomic_t *irq_counts; -}; - struct dpu_kms { struct msm_kms base; struct drm_device *dev; @@ -104,7 +92,6 @@ struct dpu_kms { struct regulator *venus; struct dpu_hw_intr *hw_intr; - struct dpu_irq irq_obj; struct dpu_core_perf perf; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index c989621209aa..a3e3b9d1b82e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1193,7 +1193,7 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane) if (DPU_FORMAT_IS_YUV(fmt)) _dpu_plane_setup_csc(pdpu); else - pdpu->csc_ptr = 0; + pdpu->csc_ptr = NULL; } _dpu_plane_set_qos_lut(plane, fb); @@ -1330,7 +1330,7 @@ static void dpu_plane_reset(struct drm_plane *plane) /* remove previous state, if present */ if (plane->state) { dpu_plane_destroy_state(plane, plane->state); - plane->state = 0; + plane->state = NULL; } pstate = kzalloc(sizeof(*pstate), GFP_KERNEL); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index cdcaf470f148..5a33bb148e9e 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -173,12 +173,9 @@ int mdp4_disable(struct mdp4_kms *mdp4_kms) DBG(""); clk_disable_unprepare(mdp4_kms->clk); - if (mdp4_kms->pclk) - clk_disable_unprepare(mdp4_kms->pclk); - if (mdp4_kms->lut_clk) - clk_disable_unprepare(mdp4_kms->lut_clk); - if (mdp4_kms->axi_clk) - clk_disable_unprepare(mdp4_kms->axi_clk); + clk_disable_unprepare(mdp4_kms->pclk); + clk_disable_unprepare(mdp4_kms->lut_clk); + clk_disable_unprepare(mdp4_kms->axi_clk); return 0; } @@ -188,12 +185,9 @@ int mdp4_enable(struct mdp4_kms *mdp4_kms) DBG(""); clk_prepare_enable(mdp4_kms->clk); - if (mdp4_kms->pclk) - clk_prepare_enable(mdp4_kms->pclk); - if (mdp4_kms->lut_clk) - clk_prepare_enable(mdp4_kms->lut_clk); - if (mdp4_kms->axi_clk) - clk_prepare_enable(mdp4_kms->axi_clk); + clk_prepare_enable(mdp4_kms->pclk); + clk_prepare_enable(mdp4_kms->lut_clk); + clk_prepare_enable(mdp4_kms->axi_clk); return 0; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index 9741544ffc35..1bf9ff5dbabc 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -752,6 +752,94 @@ const struct mdp5_cfg_hw msm8x76_config = { .max_clk = 360000000, }; +static const struct mdp5_cfg_hw msm8x53_config = { + .name = "msm8x53", + .mdp = { + .count = 1, + .caps = MDP_CAP_CDM | + MDP_CAP_SRC_SPLIT, + }, + .ctl = { + .count = 3, + .base = { 0x01000, 0x01200, 0x01400 }, + .flush_hw_mask = 0xffffffff, + }, + .pipe_vig = { + .count = 1, + .base = { 0x04000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SCALE | + MDP_PIPE_CAP_CSC | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_rgb = { + .count = 2, + .base = { 0x14000, 0x16000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_dma = { + .count = 1, + .base = { 0x24000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_cursor = { + .count = 1, + .base = { 0x34000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + MDP_PIPE_CAP_CURSOR | + 0, + }, + + .lm = { + .count = 3, + .base = { 0x44000, 0x45000 }, + .instances = { + { .id = 0, .pp = 0, .dspp = 0, + .caps = MDP_LM_CAP_DISPLAY | + MDP_LM_CAP_PAIR }, + { .id = 1, .pp = 1, .dspp = -1, + .caps = MDP_LM_CAP_DISPLAY }, + }, + .nb_stages = 5, + .max_width = 2048, + .max_height = 0xFFFF, + }, + .dspp = { + .count = 1, + .base = { 0x54000 }, + + }, + .pp = { + .count = 2, + .base = { 0x70000, 0x70800 }, + }, + .cdm = { + .count = 1, + .base = { 0x79200 }, + }, + .intf = { + .base = { 0x6a000, 0x6a800, 0x6b000 }, + .connect = { + [0] = INTF_DISABLED, + [1] = INTF_DSI, + [2] = INTF_DSI, + }, + }, + .max_clk = 400000000, +}; + static const struct mdp5_cfg_hw msm8917_config = { .name = "msm8917", .mdp = { @@ -1151,6 +1239,7 @@ static const struct mdp5_cfg_handler cfg_handlers_v1[] = { { .revision = 7, .config = { .hw = &msm8x96_config } }, { .revision = 11, .config = { .hw = &msm8x76_config } }, { .revision = 15, .config = { .hw = &msm8917_config } }, + { .revision = 16, .config = { .hw = &msm8x53_config } }, }; static const struct mdp5_cfg_handler cfg_handlers_v3[] = { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index f482e0911d03..bb7d066618e6 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1125,6 +1125,20 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc) __drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base); } +static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { + .set_config = drm_atomic_helper_set_config, + .destroy = mdp5_crtc_destroy, + .page_flip = drm_atomic_helper_page_flip, + .reset = mdp5_crtc_reset, + .atomic_duplicate_state = mdp5_crtc_duplicate_state, + .atomic_destroy_state = mdp5_crtc_destroy_state, + .atomic_print_state = mdp5_crtc_atomic_print_state, + .get_vblank_counter = mdp5_crtc_get_vblank_counter, + .enable_vblank = msm_crtc_enable_vblank, + .disable_vblank = msm_crtc_disable_vblank, + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +}; + static const struct drm_crtc_funcs mdp5_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = mdp5_crtc_destroy, @@ -1313,6 +1327,8 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, + cursor_plane ? + &mdp5_crtc_no_lm_cursor_funcs : &mdp5_crtc_funcs, NULL); drm_flip_work_init(&mdp5_crtc->unref_cursor_work, diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index b3b42672b2d4..7b242246d4e7 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -295,15 +295,12 @@ static int mdp5_disable(struct mdp5_kms *mdp5_kms) mdp5_kms->enable_count--; WARN_ON(mdp5_kms->enable_count < 0); - if (mdp5_kms->tbu_rt_clk) - clk_disable_unprepare(mdp5_kms->tbu_rt_clk); - if (mdp5_kms->tbu_clk) - clk_disable_unprepare(mdp5_kms->tbu_clk); + clk_disable_unprepare(mdp5_kms->tbu_rt_clk); + clk_disable_unprepare(mdp5_kms->tbu_clk); clk_disable_unprepare(mdp5_kms->ahb_clk); clk_disable_unprepare(mdp5_kms->axi_clk); clk_disable_unprepare(mdp5_kms->core_clk); - if (mdp5_kms->lut_clk) - clk_disable_unprepare(mdp5_kms->lut_clk); + clk_disable_unprepare(mdp5_kms->lut_clk); return 0; } @@ -317,12 +314,9 @@ static int mdp5_enable(struct mdp5_kms *mdp5_kms) clk_prepare_enable(mdp5_kms->ahb_clk); clk_prepare_enable(mdp5_kms->axi_clk); clk_prepare_enable(mdp5_kms->core_clk); - if (mdp5_kms->lut_clk) - clk_prepare_enable(mdp5_kms->lut_clk); - if (mdp5_kms->tbu_clk) - clk_prepare_enable(mdp5_kms->tbu_clk); - if (mdp5_kms->tbu_rt_clk) - clk_prepare_enable(mdp5_kms->tbu_rt_clk); + clk_prepare_enable(mdp5_kms->lut_clk); + clk_prepare_enable(mdp5_kms->tbu_clk); + clk_prepare_enable(mdp5_kms->tbu_rt_clk); return 0; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c index 2f4895bcb0b0..0ea53420bc40 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c @@ -136,10 +136,8 @@ static int mdp5_mdss_enable(struct msm_mdss *mdss) DBG(""); clk_prepare_enable(mdp5_mdss->ahb_clk); - if (mdp5_mdss->axi_clk) - clk_prepare_enable(mdp5_mdss->axi_clk); - if (mdp5_mdss->vsync_clk) - clk_prepare_enable(mdp5_mdss->vsync_clk); + clk_prepare_enable(mdp5_mdss->axi_clk); + clk_prepare_enable(mdp5_mdss->vsync_clk); return 0; } @@ -149,10 +147,8 @@ static int mdp5_mdss_disable(struct msm_mdss *mdss) struct mdp5_mdss *mdp5_mdss = to_mdp5_mdss(mdss); DBG(""); - if (mdp5_mdss->vsync_clk) - clk_disable_unprepare(mdp5_mdss->vsync_clk); - if (mdp5_mdss->axi_clk) - clk_disable_unprepare(mdp5_mdss->axi_clk); + clk_disable_unprepare(mdp5_mdss->vsync_clk); + clk_disable_unprepare(mdp5_mdss->axi_clk); clk_disable_unprepare(mdp5_mdss->ahb_clk); return 0; diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index cabe15190ec1..2e1acb1bc390 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -126,8 +126,12 @@ void msm_disp_snapshot_capture_state(struct msm_disp_state *disp_state) priv = drm_dev->dev_private; kms = priv->kms; - if (priv->dp) - msm_dp_snapshot(disp_state, priv->dp); + for (i = 0; i < ARRAY_SIZE(priv->dp); i++) { + if (!priv->dp[i]) + continue; + + msm_dp_snapshot(disp_state, priv->dp[i]); + } for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) { if (!priv->dsi[i]) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index cc2bb8295329..6ae9b29044b6 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -24,15 +24,6 @@ #define DP_INTERRUPT_STATUS_ACK_SHIFT 1 #define DP_INTERRUPT_STATUS_MASK_SHIFT 2 -#define MSM_DP_CONTROLLER_AHB_OFFSET 0x0000 -#define MSM_DP_CONTROLLER_AHB_SIZE 0x0200 -#define MSM_DP_CONTROLLER_AUX_OFFSET 0x0200 -#define MSM_DP_CONTROLLER_AUX_SIZE 0x0200 -#define MSM_DP_CONTROLLER_LINK_OFFSET 0x0400 -#define MSM_DP_CONTROLLER_LINK_SIZE 0x0C00 -#define MSM_DP_CONTROLLER_P0_OFFSET 0x1000 -#define MSM_DP_CONTROLLER_P0_SIZE 0x0400 - #define DP_INTERRUPT_STATUS1 \ (DP_INTR_AUX_I2C_DONE| \ DP_INTR_WRONG_ADDR | DP_INTR_TIMEOUT | \ @@ -66,82 +57,77 @@ void dp_catalog_snapshot(struct dp_catalog *dp_catalog, struct msm_disp_state *d { struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); + struct dss_io_data *dss = &catalog->io->dp_controller; - msm_disp_snapshot_add_block(disp_state, catalog->io->dp_controller.len, - catalog->io->dp_controller.base, "dp_ctrl"); + msm_disp_snapshot_add_block(disp_state, dss->ahb.len, dss->ahb.base, "dp_ahb"); + msm_disp_snapshot_add_block(disp_state, dss->aux.len, dss->aux.base, "dp_aux"); + msm_disp_snapshot_add_block(disp_state, dss->link.len, dss->link.base, "dp_link"); + msm_disp_snapshot_add_block(disp_state, dss->p0.len, dss->p0.base, "dp_p0"); } static inline u32 dp_read_aux(struct dp_catalog_private *catalog, u32 offset) { - offset += MSM_DP_CONTROLLER_AUX_OFFSET; - return readl_relaxed(catalog->io->dp_controller.base + offset); + return readl_relaxed(catalog->io->dp_controller.aux.base + offset); } static inline void dp_write_aux(struct dp_catalog_private *catalog, u32 offset, u32 data) { - offset += MSM_DP_CONTROLLER_AUX_OFFSET; /* * To make sure aux reg writes happens before any other operation, * this function uses writel() instread of writel_relaxed() */ - writel(data, catalog->io->dp_controller.base + offset); + writel(data, catalog->io->dp_controller.aux.base + offset); } static inline u32 dp_read_ahb(struct dp_catalog_private *catalog, u32 offset) { - offset += MSM_DP_CONTROLLER_AHB_OFFSET; - return readl_relaxed(catalog->io->dp_controller.base + offset); + return readl_relaxed(catalog->io->dp_controller.ahb.base + offset); } static inline void dp_write_ahb(struct dp_catalog_private *catalog, u32 offset, u32 data) { - offset += MSM_DP_CONTROLLER_AHB_OFFSET; /* * To make sure phy reg writes happens before any other operation, * this function uses writel() instread of writel_relaxed() */ - writel(data, catalog->io->dp_controller.base + offset); + writel(data, catalog->io->dp_controller.ahb.base + offset); } static inline void dp_write_p0(struct dp_catalog_private *catalog, u32 offset, u32 data) { - offset += MSM_DP_CONTROLLER_P0_OFFSET; /* * To make sure interface reg writes happens before any other operation, * this function uses writel() instread of writel_relaxed() */ - writel(data, catalog->io->dp_controller.base + offset); + writel(data, catalog->io->dp_controller.p0.base + offset); } static inline u32 dp_read_p0(struct dp_catalog_private *catalog, u32 offset) { - offset += MSM_DP_CONTROLLER_P0_OFFSET; /* * To make sure interface reg writes happens before any other operation, * this function uses writel() instread of writel_relaxed() */ - return readl_relaxed(catalog->io->dp_controller.base + offset); + return readl_relaxed(catalog->io->dp_controller.p0.base + offset); } static inline u32 dp_read_link(struct dp_catalog_private *catalog, u32 offset) { - offset += MSM_DP_CONTROLLER_LINK_OFFSET; - return readl_relaxed(catalog->io->dp_controller.base + offset); + return readl_relaxed(catalog->io->dp_controller.link.base + offset); } static inline void dp_write_link(struct dp_catalog_private *catalog, u32 offset, u32 data) { - offset += MSM_DP_CONTROLLER_LINK_OFFSET; /* * To make sure link reg writes happens before any other operation, * this function uses writel() instread of writel_relaxed() */ - writel(data, catalog->io->dp_controller.base + offset); + writel(data, catalog->io->dp_controller.link.base + offset); } /* aux related catalog functions */ @@ -276,29 +262,21 @@ static void dump_regs(void __iomem *base, int len) void dp_catalog_dump_regs(struct dp_catalog *dp_catalog) { - u32 offset, len; struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); + struct dss_io_data *io = &catalog->io->dp_controller; pr_info("AHB regs\n"); - offset = MSM_DP_CONTROLLER_AHB_OFFSET; - len = MSM_DP_CONTROLLER_AHB_SIZE; - dump_regs(catalog->io->dp_controller.base + offset, len); + dump_regs(io->ahb.base, io->ahb.len); pr_info("AUXCLK regs\n"); - offset = MSM_DP_CONTROLLER_AUX_OFFSET; - len = MSM_DP_CONTROLLER_AUX_SIZE; - dump_regs(catalog->io->dp_controller.base + offset, len); + dump_regs(io->aux.base, io->aux.len); pr_info("LCLK regs\n"); - offset = MSM_DP_CONTROLLER_LINK_OFFSET; - len = MSM_DP_CONTROLLER_LINK_SIZE; - dump_regs(catalog->io->dp_controller.base + offset, len); + dump_regs(io->link.base, io->link.len); pr_info("P0CLK regs\n"); - offset = MSM_DP_CONTROLLER_P0_OFFSET; - len = MSM_DP_CONTROLLER_P0_SIZE; - dump_regs(catalog->io->dp_controller.base + offset, len); + dump_regs(io->p0.base, io->p0.len); } u32 dp_catalog_aux_get_irq(struct dp_catalog *dp_catalog) @@ -493,8 +471,7 @@ int dp_catalog_ctrl_set_pattern(struct dp_catalog *dp_catalog, bit = BIT(pattern - 1) << DP_MAINLINK_READY_LINK_TRAINING_SHIFT; /* Poll for mainlink ready status */ - ret = readx_poll_timeout(readl, catalog->io->dp_controller.base + - MSM_DP_CONTROLLER_LINK_OFFSET + + ret = readx_poll_timeout(readl, catalog->io->dp_controller.link.base + REG_DP_MAINLINK_READY, data, data & bit, POLLING_SLEEP_US, POLLING_TIMEOUT_US); @@ -541,8 +518,7 @@ bool dp_catalog_ctrl_mainlink_ready(struct dp_catalog *dp_catalog) struct dp_catalog_private, dp_catalog); /* Poll for mainlink ready status */ - ret = readl_poll_timeout(catalog->io->dp_controller.base + - MSM_DP_CONTROLLER_LINK_OFFSET + + ret = readl_poll_timeout(catalog->io->dp_controller.link.base + REG_DP_MAINLINK_READY, data, data & DP_MAINLINK_READY_FOR_VIDEO, POLLING_SLEEP_US, POLLING_TIMEOUT_US); diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c index 2f6247e80e9d..da4323556ef3 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.c +++ b/drivers/gpu/drm/msm/dp/dp_debug.c @@ -24,240 +24,108 @@ struct dp_debug_private { struct dp_usbpd *usbpd; struct dp_link *link; struct dp_panel *panel; - struct drm_connector **connector; + struct drm_connector *connector; struct device *dev; struct drm_device *drm_dev; struct dp_debug dp_debug; }; -static int dp_debug_check_buffer_overflow(int rc, int *max_size, int *len) -{ - if (rc >= *max_size) { - DRM_ERROR("buffer overflow\n"); - return -EINVAL; - } - *len += rc; - *max_size = SZ_4K - *len; - - return 0; -} - -static ssize_t dp_debug_read_info(struct file *file, char __user *user_buff, - size_t count, loff_t *ppos) +static int dp_debug_show(struct seq_file *seq, void *p) { - struct dp_debug_private *debug = file->private_data; - char *buf; - u32 len = 0, rc = 0; + struct dp_debug_private *debug = seq->private; u64 lclk = 0; - u32 max_size = SZ_4K; u32 link_params_rate; - struct drm_display_mode *drm_mode; + const struct drm_display_mode *drm_mode; if (!debug) return -ENODEV; - if (*ppos) - return 0; - - buf = kzalloc(SZ_4K, GFP_KERNEL); - if (!buf) - return -ENOMEM; - drm_mode = &debug->panel->dp_mode.drm_mode; - rc = snprintf(buf + len, max_size, "\tname = %s\n", DEBUG_NAME); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\tdp_panel\n\t\tmax_pclk_khz = %d\n", + seq_printf(seq, "\tname = %s\n", DEBUG_NAME); + seq_printf(seq, "\tdp_panel\n\t\tmax_pclk_khz = %d\n", debug->panel->max_pclk_khz); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\tdrm_dp_link\n\t\trate = %u\n", + seq_printf(seq, "\tdrm_dp_link\n\t\trate = %u\n", debug->panel->link_info.rate); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tnum_lanes = %u\n", + seq_printf(seq, "\t\tnum_lanes = %u\n", debug->panel->link_info.num_lanes); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tcapabilities = %lu\n", + seq_printf(seq, "\t\tcapabilities = %lu\n", debug->panel->link_info.capabilities); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\tdp_panel_info:\n\t\tactive = %dx%d\n", + seq_printf(seq, "\tdp_panel_info:\n\t\tactive = %dx%d\n", drm_mode->hdisplay, drm_mode->vdisplay); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tback_porch = %dx%d\n", + seq_printf(seq, "\t\tback_porch = %dx%d\n", drm_mode->htotal - drm_mode->hsync_end, drm_mode->vtotal - drm_mode->vsync_end); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tfront_porch = %dx%d\n", + seq_printf(seq, "\t\tfront_porch = %dx%d\n", drm_mode->hsync_start - drm_mode->hdisplay, drm_mode->vsync_start - drm_mode->vdisplay); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tsync_width = %dx%d\n", + seq_printf(seq, "\t\tsync_width = %dx%d\n", drm_mode->hsync_end - drm_mode->hsync_start, drm_mode->vsync_end - drm_mode->vsync_start); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tactive_low = %dx%d\n", + seq_printf(seq, "\t\tactive_low = %dx%d\n", debug->panel->dp_mode.h_active_low, debug->panel->dp_mode.v_active_low); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\th_skew = %d\n", + seq_printf(seq, "\t\th_skew = %d\n", drm_mode->hskew); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\trefresh rate = %d\n", + seq_printf(seq, "\t\trefresh rate = %d\n", drm_mode_vrefresh(drm_mode)); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tpixel clock khz = %d\n", + seq_printf(seq, "\t\tpixel clock khz = %d\n", drm_mode->clock); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tbpp = %d\n", + seq_printf(seq, "\t\tbpp = %d\n", debug->panel->dp_mode.bpp); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; /* Link Information */ - rc = snprintf(buf + len, max_size, - "\tdp_link:\n\t\ttest_requested = %d\n", + seq_printf(seq, "\tdp_link:\n\t\ttest_requested = %d\n", debug->link->sink_request); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tnum_lanes = %d\n", + seq_printf(seq, "\t\tnum_lanes = %d\n", debug->link->link_params.num_lanes); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - link_params_rate = debug->link->link_params.rate; - rc = snprintf(buf + len, max_size, - "\t\tbw_code = %d\n", + seq_printf(seq, "\t\tbw_code = %d\n", drm_dp_link_rate_to_bw_code(link_params_rate)); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - lclk = debug->link->link_params.rate * 1000; - rc = snprintf(buf + len, max_size, - "\t\tlclk = %lld\n", lclk); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tv_level = %d\n", + seq_printf(seq, "\t\tlclk = %lld\n", lclk); + seq_printf(seq, "\t\tv_level = %d\n", debug->link->phy_params.v_level); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - rc = snprintf(buf + len, max_size, - "\t\tp_level = %d\n", + seq_printf(seq, "\t\tp_level = %d\n", debug->link->phy_params.p_level); - if (dp_debug_check_buffer_overflow(rc, &max_size, &len)) - goto error; - - if (copy_to_user(user_buff, buf, len)) - goto error; - - *ppos += len; - kfree(buf); - return len; - error: - kfree(buf); - return -EINVAL; + return 0; } +DEFINE_SHOW_ATTRIBUTE(dp_debug); static int dp_test_data_show(struct seq_file *m, void *data) { - struct drm_device *dev; - struct dp_debug_private *debug; - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; + const struct dp_debug_private *debug = m->private; + const struct drm_connector *connector = debug->connector; u32 bpc; - debug = m->private; - dev = debug->drm_dev; - drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - - if (connector->connector_type != - DRM_MODE_CONNECTOR_DisplayPort) - continue; - - if (connector->status == connector_status_connected) { - bpc = debug->link->test_video.test_bit_depth; - seq_printf(m, "hdisplay: %d\n", - debug->link->test_video.test_h_width); - seq_printf(m, "vdisplay: %d\n", - debug->link->test_video.test_v_height); - seq_printf(m, "bpc: %u\n", - dp_link_bit_depth_to_bpc(bpc)); - } else - seq_puts(m, "0"); + if (connector->status == connector_status_connected) { + bpc = debug->link->test_video.test_bit_depth; + seq_printf(m, "hdisplay: %d\n", + debug->link->test_video.test_h_width); + seq_printf(m, "vdisplay: %d\n", + debug->link->test_video.test_v_height); + seq_printf(m, "bpc: %u\n", + dp_link_bit_depth_to_bpc(bpc)); + } else { + seq_puts(m, "0"); } - drm_connector_list_iter_end(&conn_iter); - return 0; } DEFINE_SHOW_ATTRIBUTE(dp_test_data); static int dp_test_type_show(struct seq_file *m, void *data) { - struct dp_debug_private *debug = m->private; - struct drm_device *dev = debug->drm_dev; - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; + const struct dp_debug_private *debug = m->private; + const struct drm_connector *connector = debug->connector; - drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - - if (connector->connector_type != - DRM_MODE_CONNECTOR_DisplayPort) - continue; - - if (connector->status == connector_status_connected) - seq_printf(m, "%02x", DP_TEST_LINK_VIDEO_PATTERN); - else - seq_puts(m, "0"); - } - drm_connector_list_iter_end(&conn_iter); + if (connector->status == connector_status_connected) + seq_printf(m, "%02x", DP_TEST_LINK_VIDEO_PATTERN); + else + seq_puts(m, "0"); return 0; } @@ -269,14 +137,12 @@ static ssize_t dp_test_active_write(struct file *file, { char *input_buffer; int status = 0; - struct dp_debug_private *debug; - struct drm_device *dev; - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; + const struct dp_debug_private *debug; + const struct drm_connector *connector; int val = 0; debug = ((struct seq_file *)file->private_data)->private; - dev = debug->drm_dev; + connector = debug->connector; if (len == 0) return 0; @@ -287,30 +153,22 @@ static ssize_t dp_test_active_write(struct file *file, DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len); - drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->connector_type != - DRM_MODE_CONNECTOR_DisplayPort) - continue; - - if (connector->status == connector_status_connected) { - status = kstrtoint(input_buffer, 10, &val); - if (status < 0) - break; - DRM_DEBUG_DRIVER("Got %d for test active\n", val); - /* To prevent erroneous activation of the compliance - * testing code, only accept an actual value of 1 here - */ - if (val == 1) - debug->panel->video_test = true; - else - debug->panel->video_test = false; + if (connector->status == connector_status_connected) { + status = kstrtoint(input_buffer, 10, &val); + if (status < 0) { + kfree(input_buffer); + return status; } + DRM_DEBUG_DRIVER("Got %d for test active\n", val); + /* To prevent erroneous activation of the compliance + * testing code, only accept an actual value of 1 here + */ + if (val == 1) + debug->panel->video_test = true; + else + debug->panel->video_test = false; } - drm_connector_list_iter_end(&conn_iter); kfree(input_buffer); - if (status < 0) - return status; *offp += len; return len; @@ -319,25 +177,16 @@ static ssize_t dp_test_active_write(struct file *file, static int dp_test_active_show(struct seq_file *m, void *data) { struct dp_debug_private *debug = m->private; - struct drm_device *dev = debug->drm_dev; - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; - - drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->connector_type != - DRM_MODE_CONNECTOR_DisplayPort) - continue; - - if (connector->status == connector_status_connected) { - if (debug->panel->video_test) - seq_puts(m, "1"); - else - seq_puts(m, "0"); - } else + struct drm_connector *connector = debug->connector; + + if (connector->status == connector_status_connected) { + if (debug->panel->video_test) + seq_puts(m, "1"); + else seq_puts(m, "0"); + } else { + seq_puts(m, "0"); } - drm_connector_list_iter_end(&conn_iter); return 0; } @@ -349,11 +198,6 @@ static int dp_test_active_open(struct inode *inode, inode->i_private); } -static const struct file_operations dp_debug_fops = { - .open = simple_open, - .read = dp_debug_read_info, -}; - static const struct file_operations test_active_fops = { .owner = THIS_MODULE, .open = dp_test_active_open, @@ -391,7 +235,7 @@ static int dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor) struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_usbpd *usbpd, struct dp_link *link, - struct drm_connector **connector, struct drm_minor *minor) + struct drm_connector *connector, struct drm_minor *minor) { int rc = 0; struct dp_debug_private *debug; diff --git a/drivers/gpu/drm/msm/dp/dp_debug.h b/drivers/gpu/drm/msm/dp/dp_debug.h index 7eaedfbb149c..8c0d0b5178fd 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.h +++ b/drivers/gpu/drm/msm/dp/dp_debug.h @@ -43,7 +43,7 @@ struct dp_debug { */ struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_usbpd *usbpd, struct dp_link *link, - struct drm_connector **connector, + struct drm_connector *connector, struct drm_minor *minor); /** @@ -60,7 +60,7 @@ void dp_debug_put(struct dp_debug *dp_debug); static inline struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_usbpd *usbpd, struct dp_link *link, - struct drm_connector **connector, struct drm_minor *minor) + struct drm_connector *connector, struct drm_minor *minor) { return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index fbe4c2cd52a3..aba8aa47ed76 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -10,6 +10,7 @@ #include <linux/component.h> #include <linux/of_irq.h> #include <linux/delay.h> +#include <drm/drm_panel.h> #include "msm_drv.h" #include "msm_kms.h" @@ -27,7 +28,6 @@ #include "dp_audio.h" #include "dp_debug.h" -static struct msm_dp *g_dp_display; #define HPD_STRING_SIZE 30 enum { @@ -79,6 +79,8 @@ struct dp_display_private { char *name; int irq; + unsigned int id; + /* state variables */ bool core_initialized; bool hpd_irq_on; @@ -116,11 +118,35 @@ struct dp_display_private { struct dp_audio *audio; }; +struct msm_dp_desc { + phys_addr_t io_start; + unsigned int connector_type; +}; + +struct msm_dp_config { + const struct msm_dp_desc *descs; + size_t num_descs; +}; + +static const struct msm_dp_config sc7180_dp_cfg = { + .descs = (const struct msm_dp_desc[]) { + [MSM_DP_CONTROLLER_0] = { .io_start = 0x0ae90000, .connector_type = DRM_MODE_CONNECTOR_DisplayPort }, + }, + .num_descs = 1, +}; + static const struct of_device_id dp_dt_match[] = { - {.compatible = "qcom,sc7180-dp"}, + { .compatible = "qcom,sc7180-dp", .data = &sc7180_dp_cfg }, {} }; +static struct dp_display_private *dev_get_dp_display_private(struct device *dev) +{ + struct msm_dp *dp = dev_get_drvdata(dev); + + return container_of(dp, struct dp_display_private, dp_display); +} + static int dp_add_event(struct dp_display_private *dp_priv, u32 event, u32 data, u32 delay) { @@ -197,25 +223,24 @@ static int dp_display_bind(struct device *dev, struct device *master, void *data) { int rc = 0; - struct dp_display_private *dp; - struct drm_device *drm; + struct dp_display_private *dp = dev_get_dp_display_private(dev); struct msm_drm_private *priv; + struct drm_device *drm; drm = dev_get_drvdata(master); - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); - dp->dp_display.drm_dev = drm; priv = drm->dev_private; - priv->dp = &(dp->dp_display); + priv->dp[dp->id] = &dp->dp_display; - rc = dp->parser->parse(dp->parser); + rc = dp->parser->parse(dp->parser, dp->dp_display.connector_type); if (rc) { DRM_ERROR("device tree parsing failed\n"); goto end; } + dp->dp_display.panel_bridge = dp->parser->panel_bridge; + dp->aux->drm_dev = drm; rc = dp_aux_register(dp->aux); if (rc) { @@ -240,16 +265,13 @@ end: static void dp_display_unbind(struct device *dev, struct device *master, void *data) { - struct dp_display_private *dp; + struct dp_display_private *dp = dev_get_dp_display_private(dev); struct drm_device *drm = dev_get_drvdata(master); struct msm_drm_private *priv = drm->dev_private; - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); - dp_power_client_deinit(dp->power); dp_aux_unregister(dp->aux); - priv->dp = NULL; + priv->dp[dp->id] = NULL; } static const struct component_ops dp_display_comp_ops = { @@ -379,38 +401,17 @@ static void dp_display_host_deinit(struct dp_display_private *dp) static int dp_display_usbpd_configure_cb(struct device *dev) { - int rc = 0; - struct dp_display_private *dp; - - if (!dev) { - DRM_ERROR("invalid dev\n"); - rc = -EINVAL; - goto end; - } - - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); + struct dp_display_private *dp = dev_get_dp_display_private(dev); dp_display_host_init(dp, false); - rc = dp_display_process_hpd_high(dp); -end: - return rc; + return dp_display_process_hpd_high(dp); } static int dp_display_usbpd_disconnect_cb(struct device *dev) { int rc = 0; - struct dp_display_private *dp; - - if (!dev) { - DRM_ERROR("invalid dev\n"); - rc = -EINVAL; - return rc; - } - - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); + struct dp_display_private *dp = dev_get_dp_display_private(dev); dp_add_event(dp, EV_USER_NOTIFICATION, false, 0); @@ -472,15 +473,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev) { int rc = 0; u32 sink_request; - struct dp_display_private *dp; - - if (!dev) { - DRM_ERROR("invalid dev\n"); - return -EINVAL; - } - - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); + struct dp_display_private *dp = dev_get_dp_display_private(dev); /* check for any test request issued by sink */ rc = dp_link_process_request(dp->link); @@ -647,7 +640,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) DRM_DEBUG_DP("hpd_state=%d\n", state); /* signal the disconnect event early to ensure proper teardown */ - dp_display_handle_plugged_change(g_dp_display, false); + dp_display_handle_plugged_change(&dp->dp_display, false); /* enable HDP plug interrupt to prepare for next plugin */ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, true); @@ -834,7 +827,7 @@ static int dp_display_set_mode(struct msm_dp *dp_display, return 0; } -static int dp_display_prepare(struct msm_dp *dp) +static int dp_display_prepare(struct msm_dp *dp_display) { return 0; } @@ -842,9 +835,7 @@ static int dp_display_prepare(struct msm_dp *dp) static int dp_display_enable(struct dp_display_private *dp, u32 data) { int rc = 0; - struct msm_dp *dp_display; - - dp_display = g_dp_display; + struct msm_dp *dp_display = &dp->dp_display; DRM_DEBUG_DP("sink_count=%d\n", dp->link->sink_count); if (dp_display->power_on) { @@ -880,9 +871,7 @@ static int dp_display_post_enable(struct msm_dp *dp_display) static int dp_display_disable(struct dp_display_private *dp, u32 data) { - struct msm_dp *dp_display; - - dp_display = g_dp_display; + struct msm_dp *dp_display = &dp->dp_display; if (!dp_display->power_on) return 0; @@ -912,7 +901,7 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) return 0; } -static int dp_display_unprepare(struct msm_dp *dp) +static int dp_display_unprepare(struct msm_dp *dp_display) { return 0; } @@ -1213,10 +1202,33 @@ int dp_display_request_irq(struct msm_dp *dp_display) return 0; } +static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pdev, + unsigned int *id) +{ + const struct msm_dp_config *cfg = of_device_get_match_data(&pdev->dev); + struct resource *res; + int i; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return NULL; + + for (i = 0; i < cfg->num_descs; i++) { + if (cfg->descs[i].io_start == res->start) { + *id = i; + return &cfg->descs[i]; + } + } + + dev_err(&pdev->dev, "unknown displayport instance\n"); + return NULL; +} + static int dp_display_probe(struct platform_device *pdev) { int rc = 0; struct dp_display_private *dp; + const struct msm_dp_desc *desc; if (!pdev || !pdev->dev.of_node) { DRM_ERROR("pdev not found\n"); @@ -1227,8 +1239,13 @@ static int dp_display_probe(struct platform_device *pdev) if (!dp) return -ENOMEM; + desc = dp_display_get_desc(pdev, &dp->id); + if (!desc) + return -EINVAL; + dp->pdev = pdev; dp->name = "drm_dp"; + dp->dp_display.connector_type = desc->connector_type; rc = dp_init_sub_modules(dp); if (rc) { @@ -1237,14 +1254,13 @@ static int dp_display_probe(struct platform_device *pdev) } mutex_init(&dp->event_mutex); - g_dp_display = &dp->dp_display; /* Store DP audio handle inside DP display */ - g_dp_display->dp_audio = dp->audio; + dp->dp_display.dp_audio = dp->audio; init_completion(&dp->audio_comp); - platform_set_drvdata(pdev, g_dp_display); + platform_set_drvdata(pdev, &dp->dp_display); rc = component_add(&pdev->dev, &dp_display_comp_ops); if (rc) { @@ -1257,10 +1273,7 @@ static int dp_display_probe(struct platform_device *pdev) static int dp_display_remove(struct platform_device *pdev) { - struct dp_display_private *dp; - - dp = container_of(g_dp_display, - struct dp_display_private, dp_display); + struct dp_display_private *dp = dev_get_dp_display_private(&pdev->dev); dp_display_deinit_sub_modules(dp); @@ -1309,14 +1322,14 @@ static int dp_pm_resume(struct device *dev) * can not declared display is connected unless * HDMI cable is plugged in and sink_count of * dongle become 1 + * also only signal audio when disconnected */ - if (dp->link->sink_count) + if (dp->link->sink_count) { dp->dp_display.is_connected = true; - else + } else { dp->dp_display.is_connected = false; - - dp_display_handle_plugged_change(g_dp_display, - dp->dp_display.is_connected); + dp_display_handle_plugged_change(dp_display, false); + } DRM_DEBUG_DP("After, sink_count=%d is_connected=%d core_inited=%d power_on=%d\n", dp->link->sink_count, dp->dp_display.is_connected, @@ -1429,7 +1442,7 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) dev = &dp->pdev->dev; dp->debug = dp_debug_get(dev, dp->panel, dp->usbpd, - dp->link, &dp->dp_display.connector, + dp->link, dp->dp_display.connector, minor); if (IS_ERR(dp->debug)) { rc = PTR_ERR(dp->debug); diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index 8b47cdabb67e..8e80e3bac394 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -15,9 +15,11 @@ struct msm_dp { struct device *codec_dev; struct drm_connector *connector; struct drm_encoder *encoder; + struct drm_bridge *panel_bridge; bool is_connected; bool audio_enabled; bool power_on; + unsigned int connector_type; hdmi_codec_plugged_cb plugged_cb; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 764f4b81017e..76856c4ee1d6 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -5,6 +5,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic.h> +#include <drm/drm_bridge.h> #include <drm/drm_crtc.h> #include "msm_drv.h" @@ -147,7 +148,7 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display) ret = drm_connector_init(dp_display->drm_dev, connector, &dp_connector_funcs, - DRM_MODE_CONNECTOR_DisplayPort); + dp_display->connector_type); if (ret) return ERR_PTR(ret); @@ -160,5 +161,15 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display) drm_connector_attach_encoder(connector, dp_display->encoder); + if (dp_display->panel_bridge) { + ret = drm_bridge_attach(dp_display->encoder, + dp_display->panel_bridge, NULL, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret < 0) { + DRM_ERROR("failed to attach panel bridge: %d\n", ret); + return ERR_PTR(ret); + } + } + return connector; } diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 2181b60e1d1d..71db10c0f262 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -234,7 +234,7 @@ u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { struct dp_panel_private *panel; - u32 bpp = mode_edid_bpp; + u32 bpp; if (!dp_panel || !mode_edid_bpp || !mode_pclk_khz) { DRM_ERROR("invalid input\n"); diff --git a/drivers/gpu/drm/msm/dp/dp_parser.c b/drivers/gpu/drm/msm/dp/dp_parser.c index 0519dd3ac3c3..a7acc23f742b 100644 --- a/drivers/gpu/drm/msm/dp/dp_parser.c +++ b/drivers/gpu/drm/msm/dp/dp_parser.c @@ -6,11 +6,22 @@ #include <linux/of_gpio.h> #include <linux/phy/phy.h> +#include <drm/drm_of.h> #include <drm/drm_print.h> +#include <drm/drm_bridge.h> #include "dp_parser.h" #include "dp_reg.h" +#define DP_DEFAULT_AHB_OFFSET 0x0000 +#define DP_DEFAULT_AHB_SIZE 0x0200 +#define DP_DEFAULT_AUX_OFFSET 0x0200 +#define DP_DEFAULT_AUX_SIZE 0x0200 +#define DP_DEFAULT_LINK_OFFSET 0x0400 +#define DP_DEFAULT_LINK_SIZE 0x0C00 +#define DP_DEFAULT_P0_OFFSET 0x1000 +#define DP_DEFAULT_P0_SIZE 0x0400 + static const struct dp_regulator_cfg sdm845_dp_reg_cfg = { .num = 2, .regs = { @@ -19,67 +30,73 @@ static const struct dp_regulator_cfg sdm845_dp_reg_cfg = { }, }; -static int msm_dss_ioremap(struct platform_device *pdev, - struct dss_io_data *io_data) -{ - struct resource *res = NULL; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - DRM_ERROR("%pS->%s: msm_dss_get_res failed\n", - __builtin_return_address(0), __func__); - return -ENODEV; - } - - io_data->len = (u32)resource_size(res); - io_data->base = ioremap(res->start, io_data->len); - if (!io_data->base) { - DRM_ERROR("%pS->%s: ioremap failed\n", - __builtin_return_address(0), __func__); - return -EIO; - } - - return 0; -} - -static void msm_dss_iounmap(struct dss_io_data *io_data) +static void __iomem *dp_ioremap(struct platform_device *pdev, int idx, size_t *len) { - if (io_data->base) { - iounmap(io_data->base); - io_data->base = NULL; - } - io_data->len = 0; -} + struct resource *res; + void __iomem *base; -static void dp_parser_unmap_io_resources(struct dp_parser *parser) -{ - struct dp_io *io = &parser->io; + base = devm_platform_get_and_ioremap_resource(pdev, idx, &res); + if (!IS_ERR(base)) + *len = resource_size(res); - msm_dss_iounmap(&io->dp_controller); + return base; } static int dp_parser_ctrl_res(struct dp_parser *parser) { - int rc = 0; struct platform_device *pdev = parser->pdev; struct dp_io *io = &parser->io; + struct dss_io_data *dss = &io->dp_controller; + + dss->ahb.base = dp_ioremap(pdev, 0, &dss->ahb.len); + if (IS_ERR(dss->ahb.base)) + return PTR_ERR(dss->ahb.base); + + dss->aux.base = dp_ioremap(pdev, 1, &dss->aux.len); + if (IS_ERR(dss->aux.base)) { + /* + * The initial binding had a single reg, but in order to + * support variation in the sub-region sizes this was split. + * dp_ioremap() will fail with -EINVAL here if only a single + * reg is specified, so fill in the sub-region offsets and + * lengths based on this single region. + */ + if (PTR_ERR(dss->aux.base) == -EINVAL) { + if (dss->ahb.len < DP_DEFAULT_P0_OFFSET + DP_DEFAULT_P0_SIZE) { + DRM_ERROR("legacy memory region not large enough\n"); + return -EINVAL; + } + + dss->ahb.len = DP_DEFAULT_AHB_SIZE; + dss->aux.base = dss->ahb.base + DP_DEFAULT_AUX_OFFSET; + dss->aux.len = DP_DEFAULT_AUX_SIZE; + dss->link.base = dss->ahb.base + DP_DEFAULT_LINK_OFFSET; + dss->link.len = DP_DEFAULT_LINK_SIZE; + dss->p0.base = dss->ahb.base + DP_DEFAULT_P0_OFFSET; + dss->p0.len = DP_DEFAULT_P0_SIZE; + } else { + DRM_ERROR("unable to remap aux region: %pe\n", dss->aux.base); + return PTR_ERR(dss->aux.base); + } + } else { + dss->link.base = dp_ioremap(pdev, 2, &dss->link.len); + if (IS_ERR(dss->link.base)) { + DRM_ERROR("unable to remap link region: %pe\n", dss->link.base); + return PTR_ERR(dss->link.base); + } - rc = msm_dss_ioremap(pdev, &io->dp_controller); - if (rc) { - DRM_ERROR("unable to remap dp io resources, rc=%d\n", rc); - goto err; + dss->p0.base = dp_ioremap(pdev, 3, &dss->p0.len); + if (IS_ERR(dss->p0.base)) { + DRM_ERROR("unable to remap p0 region: %pe\n", dss->p0.base); + return PTR_ERR(dss->p0.base); + } } io->phy = devm_phy_get(&pdev->dev, "dp"); - if (IS_ERR(io->phy)) { - rc = PTR_ERR(io->phy); - goto err; - } + if (IS_ERR(io->phy)) + return PTR_ERR(io->phy); return 0; -err: - dp_parser_unmap_io_resources(parser); - return rc; } static int dp_parser_misc(struct dp_parser *parser) @@ -248,7 +265,28 @@ static int dp_parser_clock(struct dp_parser *parser) return 0; } -static int dp_parser_parse(struct dp_parser *parser) +static int dp_parser_find_panel(struct dp_parser *parser) +{ + struct device *dev = &parser->pdev->dev; + struct drm_panel *panel; + int rc; + + rc = drm_of_find_panel_or_bridge(dev->of_node, 1, 0, &panel, NULL); + if (rc) { + DRM_ERROR("failed to acquire DRM panel: %d\n", rc); + return rc; + } + + parser->panel_bridge = devm_drm_panel_bridge_add(dev, panel); + if (IS_ERR(parser->panel_bridge)) { + DRM_ERROR("failed to create panel bridge\n"); + return PTR_ERR(parser->panel_bridge); + } + + return 0; +} + +static int dp_parser_parse(struct dp_parser *parser, int connector_type) { int rc = 0; @@ -269,6 +307,12 @@ static int dp_parser_parse(struct dp_parser *parser) if (rc) return rc; + if (connector_type == DRM_MODE_CONNECTOR_eDP) { + rc = dp_parser_find_panel(parser); + if (rc) + return rc; + } + /* Map the corresponding regulator information according to * version. Currently, since we only have one supported platform, * mapping the regulator directly. diff --git a/drivers/gpu/drm/msm/dp/dp_parser.h b/drivers/gpu/drm/msm/dp/dp_parser.h index 34b49628bbaf..3172da089421 100644 --- a/drivers/gpu/drm/msm/dp/dp_parser.h +++ b/drivers/gpu/drm/msm/dp/dp_parser.h @@ -25,11 +25,18 @@ enum dp_pm_type { DP_MAX_PM }; -struct dss_io_data { - u32 len; +struct dss_io_region { + size_t len; void __iomem *base; }; +struct dss_io_data { + struct dss_io_region ahb; + struct dss_io_region aux; + struct dss_io_region link; + struct dss_io_region p0; +}; + static inline const char *dp_parser_pm_name(enum dp_pm_type module) { switch (module) { @@ -116,8 +123,9 @@ struct dp_parser { struct dp_display_data disp_data; const struct dp_regulator_cfg *regulator_cfg; u32 max_dp_lanes; + struct drm_bridge *panel_bridge; - int (*parse)(struct dp_parser *parser); + int (*parse)(struct dp_parser *parser, int connector_type); }; /** diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 614dc7f26f2c..75ae3008b68f 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -215,8 +215,10 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) + if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) { + ret = -EINVAL; goto fail; + } msm_dsi->encoder = encoder; diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h index b50db91cb8a7..569c8ff062ba 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.h +++ b/drivers/gpu/drm/msm/dsi/dsi.h @@ -107,6 +107,8 @@ void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host, u32 dma_base, u32 len); int msm_dsi_host_enable(struct mipi_dsi_host *host); int msm_dsi_host_disable(struct mipi_dsi_host *host); +void msm_dsi_host_enable_irq(struct mipi_dsi_host *host); +void msm_dsi_host_disable_irq(struct mipi_dsi_host *host); int msm_dsi_host_power_on(struct mipi_dsi_host *host, struct msm_dsi_phy_shared_timings *phy_shared_timings, bool is_bonded_dsi, struct msm_dsi_phy *phy); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index e269df285136..f69a125f9559 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -106,7 +106,8 @@ struct msm_dsi_host { phys_addr_t ctrl_size; struct regulator_bulk_data supplies[DSI_DEV_REGULATOR_MAX]; - struct clk *bus_clks[DSI_BUS_CLK_MAX]; + int num_bus_clks; + struct clk_bulk_data bus_clks[DSI_BUS_CLK_MAX]; struct clk *byte_clk; struct clk *esc_clk; @@ -115,16 +116,16 @@ struct msm_dsi_host { struct clk *pixel_clk_src; struct clk *byte_intf_clk; - u32 byte_clk_rate; - u32 pixel_clk_rate; - u32 esc_clk_rate; + unsigned long byte_clk_rate; + unsigned long pixel_clk_rate; + unsigned long esc_clk_rate; /* DSI v2 specific clocks */ struct clk *src_clk; struct clk *esc_clk_src; struct clk *dsi_clk_src; - u32 src_clk_rate; + unsigned long src_clk_rate; struct gpio_desc *disp_en_gpio; struct gpio_desc *te_gpio; @@ -374,15 +375,14 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) int i, ret = 0; /* get bus clocks */ - for (i = 0; i < cfg->num_bus_clks; i++) { - msm_host->bus_clks[i] = msm_clk_get(pdev, - cfg->bus_clk_names[i]); - if (IS_ERR(msm_host->bus_clks[i])) { - ret = PTR_ERR(msm_host->bus_clks[i]); - pr_err("%s: Unable to get %s clock, ret = %d\n", - __func__, cfg->bus_clk_names[i], ret); - goto exit; - } + for (i = 0; i < cfg->num_bus_clks; i++) + msm_host->bus_clks[i].id = cfg->bus_clk_names[i]; + msm_host->num_bus_clks = cfg->num_bus_clks; + + ret = devm_clk_bulk_get(&pdev->dev, msm_host->num_bus_clks, msm_host->bus_clks); + if (ret < 0) { + dev_err(&pdev->dev, "Unable to get clocks, ret = %d\n", ret); + goto exit; } /* get link and source clocks */ @@ -433,41 +433,6 @@ exit: return ret; } -static int dsi_bus_clk_enable(struct msm_dsi_host *msm_host) -{ - const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg; - int i, ret; - - DBG("id=%d", msm_host->id); - - for (i = 0; i < cfg->num_bus_clks; i++) { - ret = clk_prepare_enable(msm_host->bus_clks[i]); - if (ret) { - pr_err("%s: failed to enable bus clock %d ret %d\n", - __func__, i, ret); - goto err; - } - } - - return 0; -err: - for (; i > 0; i--) - clk_disable_unprepare(msm_host->bus_clks[i]); - - return ret; -} - -static void dsi_bus_clk_disable(struct msm_dsi_host *msm_host) -{ - const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg; - int i; - - DBG(""); - - for (i = cfg->num_bus_clks - 1; i >= 0; i--) - clk_disable_unprepare(msm_host->bus_clks[i]); -} - int msm_dsi_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -478,7 +443,7 @@ int msm_dsi_runtime_suspend(struct device *dev) if (!msm_host->cfg_hnd) return 0; - dsi_bus_clk_disable(msm_host); + clk_bulk_disable_unprepare(msm_host->num_bus_clks, msm_host->bus_clks); return 0; } @@ -493,15 +458,15 @@ int msm_dsi_runtime_resume(struct device *dev) if (!msm_host->cfg_hnd) return 0; - return dsi_bus_clk_enable(msm_host); + return clk_bulk_prepare_enable(msm_host->num_bus_clks, msm_host->bus_clks); } int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host) { - u32 byte_intf_rate; + unsigned long byte_intf_rate; int ret; - DBG("Set clk rates: pclk=%d, byteclk=%d", + DBG("Set clk rates: pclk=%d, byteclk=%lu", msm_host->mode->clock, msm_host->byte_clk_rate); ret = dev_pm_opp_set_rate(&msm_host->pdev->dev, @@ -558,13 +523,11 @@ int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host) goto pixel_clk_err; } - if (msm_host->byte_intf_clk) { - ret = clk_prepare_enable(msm_host->byte_intf_clk); - if (ret) { - pr_err("%s: Failed to enable byte intf clk\n", - __func__); - goto byte_intf_clk_err; - } + ret = clk_prepare_enable(msm_host->byte_intf_clk); + if (ret) { + pr_err("%s: Failed to enable byte intf clk\n", + __func__); + goto byte_intf_clk_err; } return 0; @@ -583,7 +546,7 @@ int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host) { int ret; - DBG("Set clk rates: pclk=%d, byteclk=%d, esc_clk=%d, dsi_src_clk=%d", + DBG("Set clk rates: pclk=%d, byteclk=%lu, esc_clk=%lu, dsi_src_clk=%lu", msm_host->mode->clock, msm_host->byte_clk_rate, msm_host->esc_clk_rate, msm_host->src_clk_rate); @@ -660,8 +623,7 @@ void dsi_link_clk_disable_6g(struct msm_dsi_host *msm_host) dev_pm_opp_set_rate(&msm_host->pdev->dev, 0); clk_disable_unprepare(msm_host->esc_clk); clk_disable_unprepare(msm_host->pixel_clk); - if (msm_host->byte_intf_clk) - clk_disable_unprepare(msm_host->byte_intf_clk); + clk_disable_unprepare(msm_host->byte_intf_clk); clk_disable_unprepare(msm_host->byte_clk); } @@ -673,10 +635,10 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host) clk_disable_unprepare(msm_host->byte_clk); } -static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi) +static unsigned long dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { struct drm_display_mode *mode = msm_host->mode; - u32 pclk_rate; + unsigned long pclk_rate; pclk_rate = mode->clock * 1000; @@ -696,7 +658,7 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { u8 lanes = msm_host->lanes; u32 bpp = dsi_get_bpp(msm_host->format); - u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi); + unsigned long pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi); u64 pclk_bpp = (u64)pclk_rate * bpp; if (lanes == 0) { @@ -713,7 +675,7 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi) msm_host->pixel_clk_rate = pclk_rate; msm_host->byte_clk_rate = pclk_bpp; - DBG("pclk=%d, bclk=%d", msm_host->pixel_clk_rate, + DBG("pclk=%lu, bclk=%lu", msm_host->pixel_clk_rate, msm_host->byte_clk_rate); } @@ -772,7 +734,7 @@ int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi) msm_host->esc_clk_rate = msm_host->byte_clk_rate / esc_div; - DBG("esc=%d, src=%d", msm_host->esc_clk_rate, + DBG("esc=%lu, src=%lu", msm_host->esc_clk_rate, msm_host->src_clk_rate); return 0; @@ -1898,6 +1860,23 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) return ret; } + msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (msm_host->irq < 0) { + ret = msm_host->irq; + dev_err(&pdev->dev, "failed to get irq: %d\n", ret); + return ret; + } + + /* do not autoenable, will be enabled later */ + ret = devm_request_irq(&pdev->dev, msm_host->irq, dsi_host_irq, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_AUTOEN, + "dsi_isr", msm_host); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ%u: %d\n", + msm_host->irq, ret); + return ret; + } + init_completion(&msm_host->dma_comp); init_completion(&msm_host->video_comp); mutex_init(&msm_host->dev_mutex); @@ -1925,7 +1904,6 @@ void msm_dsi_host_destroy(struct mipi_dsi_host *host) DBG(""); dsi_tx_buf_free(msm_host); if (msm_host->workqueue) { - flush_workqueue(msm_host->workqueue); destroy_workqueue(msm_host->workqueue); msm_host->workqueue = NULL; } @@ -1941,25 +1919,8 @@ int msm_dsi_host_modeset_init(struct mipi_dsi_host *host, { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; - struct platform_device *pdev = msm_host->pdev; int ret; - msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (msm_host->irq < 0) { - ret = msm_host->irq; - DRM_DEV_ERROR(dev->dev, "failed to get irq: %d\n", ret); - return ret; - } - - ret = devm_request_irq(&pdev->dev, msm_host->irq, - dsi_host_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - "dsi_isr", msm_host); - if (ret < 0) { - DRM_DEV_ERROR(&pdev->dev, "failed to request IRQ%u: %d\n", - msm_host->irq, ret); - return ret; - } - msm_host->dev = dev; ret = cfg_hnd->ops->tx_buf_alloc(msm_host, SZ_4K); if (ret) { @@ -2315,6 +2276,20 @@ void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host, clk_req->escclk_rate = msm_host->esc_clk_rate; } +void msm_dsi_host_enable_irq(struct mipi_dsi_host *host) +{ + struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + + enable_irq(msm_host->irq); +} + +void msm_dsi_host_disable_irq(struct mipi_dsi_host *host) +{ + struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + + disable_irq(msm_host->irq); +} + int msm_dsi_host_enable(struct mipi_dsi_host *host) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index c41d39f5b7cf..20c4d650fd80 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -3,6 +3,8 @@ * Copyright (c) 2015, The Linux Foundation. All rights reserved. */ +#include "drm/drm_bridge_connector.h" + #include "msm_kms.h" #include "dsi.h" @@ -377,6 +379,14 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge) } } + /* + * Enable before preparing the panel, disable after unpreparing, so + * that the panel can communicate over the DSI link. + */ + msm_dsi_host_enable_irq(host); + if (is_bonded_dsi && msm_dsi1) + msm_dsi_host_enable_irq(msm_dsi1->host); + /* Always call panel functions once, because even for dual panels, * there is only one drm_panel instance. */ @@ -411,6 +421,10 @@ host_en_fail: if (panel) drm_panel_unprepare(panel); panel_prep_fail: + msm_dsi_host_disable_irq(host); + if (is_bonded_dsi && msm_dsi1) + msm_dsi_host_disable_irq(msm_dsi1->host); + if (is_bonded_dsi && msm_dsi1) msm_dsi_host_power_off(msm_dsi1->host); host1_on_fail: @@ -523,6 +537,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + msm_dsi_host_disable_irq(host); + if (is_bonded_dsi && msm_dsi1) + msm_dsi_host_disable_irq(msm_dsi1->host); + /* Save PHY status if it is a clock source */ msm_dsi_phy_pll_save_state(msm_dsi->phy); @@ -688,10 +706,10 @@ struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id) { struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); struct drm_device *dev = msm_dsi->dev; + struct drm_connector *connector; struct drm_encoder *encoder; struct drm_bridge *int_bridge, *ext_bridge; - struct drm_connector *connector; - struct list_head *connector_list; + int ret; int_bridge = msm_dsi->bridge; ext_bridge = msm_dsi->external_bridge = @@ -699,22 +717,44 @@ struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id) encoder = msm_dsi->encoder; - /* link the internal dsi bridge to the external bridge */ - drm_bridge_attach(encoder, ext_bridge, int_bridge, 0); - /* - * we need the drm_connector created by the external bridge - * driver (or someone else) to feed it to our driver's - * priv->connector[] list, mainly for msm_fbdev_init() + * Try first to create the bridge without it creating its own + * connector.. currently some bridges support this, and others + * do not (and some support both modes) */ - connector_list = &dev->mode_config.connector_list; + ret = drm_bridge_attach(encoder, ext_bridge, int_bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret == -EINVAL) { + struct drm_connector *connector; + struct list_head *connector_list; + + /* link the internal dsi bridge to the external bridge */ + drm_bridge_attach(encoder, ext_bridge, int_bridge, 0); + + /* + * we need the drm_connector created by the external bridge + * driver (or someone else) to feed it to our driver's + * priv->connector[] list, mainly for msm_fbdev_init() + */ + connector_list = &dev->mode_config.connector_list; + + list_for_each_entry(connector, connector_list, head) { + if (drm_connector_has_possible_encoder(connector, encoder)) + return connector; + } + + return ERR_PTR(-ENODEV); + } - list_for_each_entry(connector, connector_list, head) { - if (drm_connector_has_possible_encoder(connector, encoder)) - return connector; + connector = drm_bridge_connector_init(dev, encoder); + if (IS_ERR(connector)) { + DRM_ERROR("Unable to create bridge connector\n"); + return ERR_CAST(connector); } - return ERR_PTR(-ENODEV); + drm_connector_attach_encoder(connector, encoder); + + return connector; } void msm_dsi_manager_bridge_destroy(struct drm_bridge *bridge) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 8c65ef6968ca..9842e04b5858 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -627,6 +627,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_14nm_cfgs }, { .compatible = "qcom,dsi-phy-14nm-660", .data = &dsi_phy_14nm_660_cfgs }, + { .compatible = "qcom,dsi-phy-14nm-8953", + .data = &dsi_phy_14nm_8953_cfgs }, #endif #ifdef CONFIG_DRM_MSM_DSI_10NM_PHY { .compatible = "qcom,dsi-phy-10nm", diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index b91303ada74f..4c8257581bfc 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -48,6 +48,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_10nm_8998_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_7nm_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index d13552b2213b..7414966f198e 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -110,14 +110,13 @@ static struct dsi_pll_14nm *pll_14nm_list[DSI_MAX]; static bool pll_14nm_poll_for_ready(struct dsi_pll_14nm *pll_14nm, u32 nb_tries, u32 timeout_us) { - bool pll_locked = false; + bool pll_locked = false, pll_ready = false; void __iomem *base = pll_14nm->phy->pll_base; u32 tries, val; tries = nb_tries; while (tries--) { - val = dsi_phy_read(base + - REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS); + val = dsi_phy_read(base + REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS); pll_locked = !!(val & BIT(5)); if (pll_locked) @@ -126,23 +125,24 @@ static bool pll_14nm_poll_for_ready(struct dsi_pll_14nm *pll_14nm, udelay(timeout_us); } - if (!pll_locked) { - tries = nb_tries; - while (tries--) { - val = dsi_phy_read(base + - REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS); - pll_locked = !!(val & BIT(0)); + if (!pll_locked) + goto out; - if (pll_locked) - break; + tries = nb_tries; + while (tries--) { + val = dsi_phy_read(base + REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS); + pll_ready = !!(val & BIT(0)); + + if (pll_ready) + break; - udelay(timeout_us); - } + udelay(timeout_us); } - DBG("DSI PLL is %slocked", pll_locked ? "" : "*not* "); +out: + DBG("DSI PLL is %slocked, %sready", pll_locked ? "" : "*not* ", pll_ready ? "" : "*not* "); - return pll_locked; + return pll_locked && pll_ready; } static void dsi_pll_14nm_config_init(struct dsi_pll_config *pconf) @@ -213,9 +213,7 @@ static void pll_14nm_dec_frac_calc(struct dsi_pll_14nm *pll, struct dsi_pll_conf DBG("vco_clk_rate=%lld ref_clk_rate=%lld", vco_clk_rate, fref); dec_start_multiple = div_u64(vco_clk_rate * multiplier, fref); - div_u64_rem(dec_start_multiple, multiplier, &div_frac_start); - - dec_start = div_u64(dec_start_multiple, multiplier); + dec_start = div_u64_rem(dec_start_multiple, multiplier, &div_frac_start); pconf->dec_start = (u32)dec_start; pconf->div_frac_start = div_frac_start; @@ -1065,3 +1063,24 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs = { .io_start = { 0xc994400, 0xc996000 }, .num_dsi_phy = 2, }; + +const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs = { + .has_phy_lane = true, + .reg_cfg = { + .num = 1, + .regs = { + {"vcca", 17000, 32}, + }, + }, + .ops = { + .enable = dsi_14nm_phy_enable, + .disable = dsi_14nm_phy_disable, + .pll_init = dsi_pll_14nm_init, + .save_pll_state = dsi_14nm_pll_save_state, + .restore_pll_state = dsi_14nm_pll_restore_state, + }, + .min_pll_rate = VCO_MIN_RATE, + .max_pll_rate = VCO_MAX_RATE, + .io_start = { 0x1a94400, 0x1a96400 }, + .num_dsi_phy = 2, +}; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index aaa37456f4ee..71ed4aa0dc67 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -428,7 +428,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov bytediv->reg = pll_28nm->phy->pll_base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9; snprintf(parent_name, 32, "dsi%dvco_clk", pll_28nm->phy->id); - snprintf(clk_name, 32, "dsi%dpllbyte", pll_28nm->phy->id); + snprintf(clk_name, 32, "dsi%dpllbyte", pll_28nm->phy->id + 1); bytediv_init.name = clk_name; bytediv_init.ops = &clk_bytediv_ops; @@ -442,7 +442,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov return ret; provided_clocks[DSI_BYTE_PLL_CLK] = &bytediv->hw; - snprintf(clk_name, 32, "dsi%dpll", pll_28nm->phy->id); + snprintf(clk_name, 32, "dsi%dpll", pll_28nm->phy->id + 1); /* DIV3 */ hw = devm_clk_hw_register_divider(dev, clk_name, parent_name, 0, pll_28nm->phy->pll_base + diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index cb297b08458e..079613d2aaa9 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -114,9 +114,7 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_7nm *pll, struct dsi_pll_config multiplier = 1 << FRAC_BITS; dec_multiple = div_u64(pll_freq * multiplier, divider); - div_u64_rem(dec_multiple, multiplier, &frac); - - dec = div_u64(dec_multiple, multiplier); + dec = div_u64_rem(dec_multiple, multiplier, &frac); if (!(pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1)) config->pll_clock_inverters = 0x28; diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index 4fb397ee7c84..a68a4a1867c1 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -1116,7 +1116,7 @@ void msm_edp_ctrl_power(struct edp_ctrl *ctrl, bool on) int msm_edp_ctrl_init(struct msm_edp *edp) { struct edp_ctrl *ctrl = NULL; - struct device *dev = &edp->pdev->dev; + struct device *dev; int ret; if (!edp) { @@ -1124,6 +1124,7 @@ int msm_edp_ctrl_init(struct msm_edp *edp) return -EINVAL; } + dev = &edp->pdev->dev; ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL); if (!ctrl) return -ENOMEM; @@ -1189,7 +1190,6 @@ void msm_edp_ctrl_destroy(struct edp_ctrl *ctrl) return; if (ctrl->workqueue) { - flush_workqueue(ctrl->workqueue); destroy_workqueue(ctrl->workqueue); ctrl->workqueue = NULL; } @@ -1242,8 +1242,6 @@ bool msm_edp_ctrl_panel_connected(struct edp_ctrl *ctrl) int msm_edp_ctrl_get_panel_info(struct edp_ctrl *ctrl, struct drm_connector *connector, struct edid **edid) { - int ret = 0; - mutex_lock(&ctrl->dev_mutex); if (ctrl->edid) { @@ -1278,7 +1276,7 @@ disable_ret: } unlock_ret: mutex_unlock(&ctrl->dev_mutex); - return ret; + return 0; } int msm_edp_ctrl_timing_cfg(struct edp_ctrl *ctrl, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 737453b6e596..75b64e6ae035 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -61,10 +61,8 @@ static void msm_hdmi_destroy(struct hdmi *hdmi) * at this point, hpd has been disabled, * after flush workq, it's safe to deinit hdcp */ - if (hdmi->workq) { - flush_workqueue(hdmi->workq); + if (hdmi->workq) destroy_workqueue(hdmi->workq); - } msm_hdmi_hdcp_destroy(hdmi); if (hdmi->phy_dev) { @@ -154,19 +152,13 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) ret = -ENOMEM; goto fail; } - for (i = 0; i < config->hpd_reg_cnt; i++) { - struct regulator *reg; - - reg = devm_regulator_get(&pdev->dev, - config->hpd_reg_names[i]); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(&pdev->dev, "failed to get hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } + for (i = 0; i < config->hpd_reg_cnt; i++) + hdmi->hpd_regs[i].supply = config->hpd_reg_names[i]; - hdmi->hpd_regs[i] = reg; + ret = devm_regulator_bulk_get(&pdev->dev, config->hpd_reg_cnt, hdmi->hpd_regs); + if (ret) { + DRM_DEV_ERROR(&pdev->dev, "failed to get hpd regulator: %d\n", ret); + goto fail; } hdmi->pwr_regs = devm_kcalloc(&pdev->dev, @@ -177,19 +169,11 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) ret = -ENOMEM; goto fail; } - for (i = 0; i < config->pwr_reg_cnt; i++) { - struct regulator *reg; - - reg = devm_regulator_get(&pdev->dev, - config->pwr_reg_names[i]); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(&pdev->dev, "failed to get pwr regulator: %s (%d)\n", - config->pwr_reg_names[i], ret); - goto fail; - } - hdmi->pwr_regs[i] = reg; + ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs); + if (ret) { + DRM_DEV_ERROR(&pdev->dev, "failed to get pwr regulator: %d\n", ret); + goto fail; } hdmi->hpd_clks = devm_kcalloc(&pdev->dev, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index d0b84f0abee1..82261078c6b1 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -56,8 +56,8 @@ struct hdmi { void __iomem *qfprom_mmio; phys_addr_t mmio_phy_addr; - struct regulator **hpd_regs; - struct regulator **pwr_regs; + struct regulator_bulk_data *hpd_regs; + struct regulator_bulk_data *pwr_regs; struct clk **hpd_clks; struct clk **pwr_clks; @@ -163,7 +163,7 @@ struct hdmi_phy { void __iomem *mmio; struct hdmi_phy_cfg *cfg; const struct hdmi_phy_funcs *funcs; - struct regulator **regs; + struct regulator_bulk_data *regs; struct clk **clks; }; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c index 6e380db9287b..f04eb4a70f0d 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c @@ -28,13 +28,9 @@ static void msm_hdmi_power_on(struct drm_bridge *bridge) pm_runtime_get_sync(&hdmi->pdev->dev); - for (i = 0; i < config->pwr_reg_cnt; i++) { - ret = regulator_enable(hdmi->pwr_regs[i]); - if (ret) { - DRM_DEV_ERROR(dev->dev, "failed to enable pwr regulator: %s (%d)\n", - config->pwr_reg_names[i], ret); - } - } + ret = regulator_bulk_enable(config->pwr_reg_cnt, hdmi->pwr_regs); + if (ret) + DRM_DEV_ERROR(dev->dev, "failed to enable pwr regulator: %d\n", ret); if (config->pwr_clk_cnt > 0) { DBG("pixclock: %lu", hdmi->pixclock); @@ -70,13 +66,9 @@ static void power_off(struct drm_bridge *bridge) for (i = 0; i < config->pwr_clk_cnt; i++) clk_disable_unprepare(hdmi->pwr_clks[i]); - for (i = 0; i < config->pwr_reg_cnt; i++) { - ret = regulator_disable(hdmi->pwr_regs[i]); - if (ret) { - DRM_DEV_ERROR(dev->dev, "failed to disable pwr regulator: %s (%d)\n", - config->pwr_reg_names[i], ret); - } - } + ret = regulator_bulk_disable(config->pwr_reg_cnt, hdmi->pwr_regs); + if (ret) + DRM_DEV_ERROR(dev->dev, "failed to disable pwr regulator: %d\n", ret); pm_runtime_put_autosuspend(&hdmi->pdev->dev); } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c index 58707a1f3878..a7f729cdec7b 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c @@ -146,16 +146,13 @@ int msm_hdmi_hpd_enable(struct drm_connector *connector) const struct hdmi_platform_config *config = hdmi->config; struct device *dev = &hdmi->pdev->dev; uint32_t hpd_ctrl; - int i, ret; + int ret; unsigned long flags; - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_enable(hdmi->hpd_regs[i]); - if (ret) { - DRM_DEV_ERROR(dev, "failed to enable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } + ret = regulator_bulk_enable(config->hpd_reg_cnt, hdmi->hpd_regs); + if (ret) { + DRM_DEV_ERROR(dev, "failed to enable hpd regulators: %d\n", ret); + goto fail; } ret = pinctrl_pm_select_default_state(dev); @@ -207,7 +204,7 @@ static void hdp_disable(struct hdmi_connector *hdmi_connector) struct hdmi *hdmi = hdmi_connector->hdmi; const struct hdmi_platform_config *config = hdmi->config; struct device *dev = &hdmi->pdev->dev; - int i, ret = 0; + int ret; /* Disable HPD interrupt */ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, 0); @@ -225,12 +222,9 @@ static void hdp_disable(struct hdmi_connector *hdmi_connector) if (ret) dev_warn(dev, "pinctrl state chg failed: %d\n", ret); - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_disable(hdmi->hpd_regs[i]); - if (ret) - dev_warn(dev, "failed to disable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - } + ret = regulator_bulk_disable(config->hpd_reg_cnt, hdmi->hpd_regs); + if (ret) + dev_warn(dev, "failed to disable hpd regulator: %d\n", ret); } static void diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 8a38d4b95102..16b0e8836d27 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -23,22 +23,15 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) if (!phy->clks) return -ENOMEM; - for (i = 0; i < cfg->num_regs; i++) { - struct regulator *reg; - - reg = devm_regulator_get(dev, cfg->reg_names[i]); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - if (ret != -EPROBE_DEFER) { - DRM_DEV_ERROR(dev, - "failed to get phy regulator: %s (%d)\n", - cfg->reg_names[i], ret); - } + for (i = 0; i < cfg->num_regs; i++) + phy->regs[i].supply = cfg->reg_names[i]; - return ret; - } + ret = devm_regulator_bulk_get(dev, cfg->num_regs, phy->regs); + if (ret) { + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(dev, "failed to get phy regulators: %d\n", ret); - phy->regs[i] = reg; + return ret; } for (i = 0; i < cfg->num_clks; i++) { @@ -66,11 +59,10 @@ int msm_hdmi_phy_resource_enable(struct hdmi_phy *phy) pm_runtime_get_sync(dev); - for (i = 0; i < cfg->num_regs; i++) { - ret = regulator_enable(phy->regs[i]); - if (ret) - DRM_DEV_ERROR(dev, "failed to enable regulator: %s (%d)\n", - cfg->reg_names[i], ret); + ret = regulator_bulk_enable(cfg->num_regs, phy->regs); + if (ret) { + DRM_DEV_ERROR(dev, "failed to enable regulators: (%d)\n", ret); + return ret; } for (i = 0; i < cfg->num_clks; i++) { @@ -92,8 +84,7 @@ void msm_hdmi_phy_resource_disable(struct hdmi_phy *phy) for (i = cfg->num_clks - 1; i >= 0; i--) clk_disable_unprepare(phy->clks[i]); - for (i = cfg->num_regs - 1; i >= 0; i--) - regulator_disable(phy->regs[i]); + regulator_bulk_disable(cfg->num_regs, phy->regs); pm_runtime_put_sync(dev); } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index a8f3b2cbfdc5..99c7853353fd 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -682,7 +682,7 @@ static int hdmi_8996_pll_is_enabled(struct clk_hw *hw) return pll_locked; } -static struct clk_ops hdmi_8996_pll_ops = { +static const struct clk_ops hdmi_8996_pll_ops = { .set_rate = hdmi_8996_pll_set_clk_rate, .round_rate = hdmi_8996_pll_round_rate, .recalc_rate = hdmi_8996_pll_recalc_rate, @@ -695,7 +695,7 @@ static const char * const hdmi_pll_parents[] = { "xo", }; -static struct clk_init_data pll_init = { +static const struct clk_init_data pll_init = { .name = "hdmipll", .ops = &hdmi_8996_pll_ops, .parent_names = hdmi_pll_parents, diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index fab09e7c6efc..27c9ae563f2f 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -116,20 +116,10 @@ out: trace_msm_atomic_async_commit_finish(crtc_mask); } -static enum hrtimer_restart msm_atomic_pending_timer(struct hrtimer *t) -{ - struct msm_pending_timer *timer = container_of(t, - struct msm_pending_timer, timer); - - kthread_queue_work(timer->worker, &timer->work); - - return HRTIMER_NORESTART; -} - static void msm_atomic_pending_work(struct kthread_work *work) { struct msm_pending_timer *timer = container_of(work, - struct msm_pending_timer, work); + struct msm_pending_timer, work.work); msm_atomic_async_commit(timer->kms, timer->crtc_idx); } @@ -139,8 +129,6 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, { timer->kms = kms; timer->crtc_idx = crtc_idx; - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = msm_atomic_pending_timer; timer->worker = kthread_create_worker(0, "atomic-worker-%d", crtc_idx); if (IS_ERR(timer->worker)) { @@ -149,7 +137,10 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, return ret; } sched_set_fifo(timer->worker->task); - kthread_init_work(&timer->work, msm_atomic_pending_work); + + msm_hrtimer_work_init(&timer->work, timer->worker, + msm_atomic_pending_work, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); return 0; } @@ -258,7 +249,7 @@ void msm_atomic_commit_tail(struct drm_atomic_state *state) vsync_time = kms->funcs->vsync_time(kms, async_crtc); wakeup_time = ktime_sub(vsync_time, ms_to_ktime(1)); - hrtimer_start(&timer->timer, wakeup_time, + msm_hrtimer_queue_work(&timer->work, wakeup_time, HRTIMER_MODE_ABS); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 2e6fc185e54d..7936e8d498dd 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -58,7 +58,7 @@ static const struct drm_mode_config_helper_funcs mode_config_helper_funcs = { }; #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING -static bool reglog = false; +static bool reglog; MODULE_PARM_DESC(reglog, "Enable register read/write logging"); module_param(reglog, bool, 0600); #else @@ -75,7 +75,7 @@ static char *vram = "16m"; MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU)"); module_param(vram, charp, 0); -bool dumpstate = false; +bool dumpstate; MODULE_PARM_DESC(dumpstate, "Dump KMS state on errors"); module_param(dumpstate, bool, 0600); @@ -200,6 +200,35 @@ void msm_rmw(void __iomem *addr, u32 mask, u32 or) msm_writel(val | or, addr); } +static enum hrtimer_restart msm_hrtimer_worktimer(struct hrtimer *t) +{ + struct msm_hrtimer_work *work = container_of(t, + struct msm_hrtimer_work, timer); + + kthread_queue_work(work->worker, &work->work); + + return HRTIMER_NORESTART; +} + +void msm_hrtimer_queue_work(struct msm_hrtimer_work *work, + ktime_t wakeup_time, + enum hrtimer_mode mode) +{ + hrtimer_start(&work->timer, wakeup_time, mode); +} + +void msm_hrtimer_work_init(struct msm_hrtimer_work *work, + struct kthread_worker *worker, + kthread_work_func_t fn, + clockid_t clock_id, + enum hrtimer_mode mode) +{ + hrtimer_init(&work->timer, clock_id, mode); + work->timer.function = msm_hrtimer_worktimer; + work->worker = worker; + kthread_init_work(&work->work, fn); +} + static irqreturn_t msm_irq(int irq, void *arg) { struct drm_device *dev = arg; @@ -630,10 +659,11 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - ret = msm_disp_snapshot_init(ddev); - if (ret) - DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); - + if (kms) { + ret = msm_disp_snapshot_init(ddev); + if (ret) + DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); + } drm_mode_config_reset(ddev); #ifdef CONFIG_DRM_FBDEV_EMULATION @@ -682,6 +712,7 @@ static void load_gpu(struct drm_device *dev) static int context_init(struct drm_device *dev, struct drm_file *file) { + static atomic_t ident = ATOMIC_INIT(0); struct msm_drm_private *priv = dev->dev_private; struct msm_file_private *ctx; @@ -689,12 +720,17 @@ static int context_init(struct drm_device *dev, struct drm_file *file) if (!ctx) return -ENOMEM; + INIT_LIST_HEAD(&ctx->submitqueues); + rwlock_init(&ctx->queuelock); + kref_init(&ctx->ref); msm_submitqueue_init(dev, ctx); ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current); file->driver_priv = ctx; + ctx->seqno = atomic_inc_return(&ident); + return 0; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 8b005d1ac899..69952b239384 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -53,14 +53,6 @@ struct msm_disp_state; #define FRAC_16_16(mult, div) (((mult) << 16) / (div)) -struct msm_file_private { - rwlock_t queuelock; - struct list_head submitqueues; - int queueid; - struct msm_gem_address_space *aspace; - struct kref ref; -}; - enum msm_mdp_plane_property { PLANE_PROP_ZPOS, PLANE_PROP_ALPHA, @@ -68,6 +60,13 @@ enum msm_mdp_plane_property { PLANE_PROP_MAX_NUM }; +enum msm_dp_controller { + MSM_DP_CONTROLLER_0, + MSM_DP_CONTROLLER_1, + MSM_DP_CONTROLLER_2, + MSM_DP_CONTROLLER_COUNT, +}; + #define MSM_GPU_MAX_RINGS 4 #define MAX_H_TILES_PER_DISPLAY 2 @@ -161,7 +160,7 @@ struct msm_drm_private { /* DSI is shared by mdp4 and mdp5 */ struct msm_dsi *dsi[2]; - struct msm_dp *dp; + struct msm_dp *dp[MSM_DP_CONTROLLER_COUNT]; /* when we have more than one 'msm_gpu' these need to be an array: */ struct msm_gpu *gpu; @@ -488,40 +487,27 @@ void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); void msm_rmw(void __iomem *addr, u32 mask, u32 or); -struct msm_gpu_submitqueue; -int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); -struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, - u32 id); -int msm_submitqueue_create(struct drm_device *drm, - struct msm_file_private *ctx, - u32 prio, u32 flags, u32 *id); -int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx, - struct drm_msm_submitqueue_query *args); -int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); -void msm_submitqueue_close(struct msm_file_private *ctx); - -void msm_submitqueue_destroy(struct kref *kref); - -static inline void __msm_file_private_destroy(struct kref *kref) -{ - struct msm_file_private *ctx = container_of(kref, - struct msm_file_private, ref); - - msm_gem_address_space_put(ctx->aspace); - kfree(ctx); -} - -static inline void msm_file_private_put(struct msm_file_private *ctx) -{ - kref_put(&ctx->ref, __msm_file_private_destroy); -} +/** + * struct msm_hrtimer_work - a helper to combine an hrtimer with kthread_work + * + * @timer: hrtimer to control when the kthread work is triggered + * @work: the kthread work + * @worker: the kthread worker the work will be scheduled on + */ +struct msm_hrtimer_work { + struct hrtimer timer; + struct kthread_work work; + struct kthread_worker *worker; +}; -static inline struct msm_file_private *msm_file_private_get( - struct msm_file_private *ctx) -{ - kref_get(&ctx->ref); - return ctx; -} +void msm_hrtimer_queue_work(struct msm_hrtimer_work *work, + ktime_t wakeup_time, + enum hrtimer_mode mode); +void msm_hrtimer_work_init(struct msm_hrtimer_work *work, + struct kthread_worker *worker, + kthread_work_func_t fn, + clockid_t clock_id, + enum hrtimer_mode mode); #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) @@ -547,7 +533,7 @@ static inline int align_pitch(int width, int bpp) static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) { ktime_t now = ktime_get(); - unsigned long remaining_jiffies; + s64 remaining_jiffies; if (ktime_compare(*timeout, now) < 0) { remaining_jiffies = 0; @@ -556,7 +542,7 @@ static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); } - return remaining_jiffies; + return clamp(remaining_jiffies, 0LL, (s64)INT_MAX); } #endif /* __MSM_DRV_H__ */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 40a9863f5951..104fdfc14027 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -5,6 +5,7 @@ */ #include <linux/dma-map-ops.h> +#include <linux/vmalloc.h> #include <linux/spinlock.h> #include <linux/shmem_fs.h> #include <linux/dma-buf.h> @@ -1132,6 +1133,7 @@ static int msm_gem_new_impl(struct drm_device *dev, msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED; + INIT_LIST_HEAD(&msm_obj->node); INIT_LIST_HEAD(&msm_obj->vmas); *obj = &msm_obj->base; @@ -1166,7 +1168,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32 ret = msm_gem_new_impl(dev, size, flags, &obj); if (ret) - goto fail; + return ERR_PTR(ret); msm_obj = to_msm_bo(obj); @@ -1250,7 +1252,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); if (ret) - goto fail; + return ERR_PTR(ret); drm_gem_private_object_init(dev, obj, size); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 0f1b29ee04a9..4a1420b05e97 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -4,6 +4,8 @@ * Author: Rob Clark <robdclark@gmail.com> */ +#include <linux/vmalloc.h> + #include "msm_drv.h" #include "msm_gem.h" #include "msm_gpu.h" diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 924b01b9c105..3cb029f10925 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -46,7 +46,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, if (!submit) return ERR_PTR(-ENOMEM); - ret = drm_sched_job_init(&submit->base, &queue->entity, queue); + ret = drm_sched_job_init(&submit->base, queue->entity, queue); if (ret) { kfree(submit); return ERR_PTR(ret); @@ -161,7 +161,8 @@ out: static int submit_lookup_cmds(struct msm_gem_submit *submit, struct drm_msm_gem_submit *args, struct drm_file *file) { - unsigned i, sz; + unsigned i; + size_t sz; int ret = 0; for (i = 0; i < args->nr_cmds; i++) { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 8a3a592da3a4..2c46cd968ac4 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -296,7 +296,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, state->bos = kcalloc(nr, sizeof(struct msm_gpu_state_bo), GFP_KERNEL); - for (i = 0; i < submit->nr_bos; i++) { + for (i = 0; state->bos && i < submit->nr_bos; i++) { if (should_dump(submit, i)) { msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, submit->bos[i].iova, submit->bos[i].flags); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 0e4b45bff2e6..59cdd00b69d0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -112,6 +112,13 @@ struct msm_gpu_devfreq { * it is inactive. */ unsigned long idle_freq; + + /** + * idle_work: + * + * Used to delay clamping to idle freq on active->idle transition. + */ + struct msm_hrtimer_work idle_work; }; struct msm_gpu { @@ -203,6 +210,10 @@ struct msm_gpu { uint32_t suspend_count; struct msm_gpu_state *crashstate; + + /* Enable clamping to idle freq when inactive: */ + bool clamp_to_idle; + /* True if the hardware supports expanded apriv (a650 and newer) */ bool hw_apriv; @@ -258,6 +269,39 @@ struct msm_gpu_perfcntr { #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) /** + * struct msm_file_private - per-drm_file context + * + * @queuelock: synchronizes access to submitqueues list + * @submitqueues: list of &msm_gpu_submitqueue created by userspace + * @queueid: counter incremented each time a submitqueue is created, + * used to assign &msm_gpu_submitqueue.id + * @aspace: the per-process GPU address-space + * @ref: reference count + * @seqno: unique per process seqno + */ +struct msm_file_private { + rwlock_t queuelock; + struct list_head submitqueues; + int queueid; + struct msm_gem_address_space *aspace; + struct kref ref; + int seqno; + + /** + * entities: + * + * Table of per-priority-level sched entities used by submitqueues + * associated with this &drm_file. Because some userspace apps + * make assumptions about rendering from multiple gl contexts + * (of the same priority) within the process happening in FIFO + * order without requiring any fencing beyond MakeCurrent(), we + * create at most one &drm_sched_entity per-process per-priority- + * level. + */ + struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS]; +}; + +/** * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority * * @gpu: the gpu instance @@ -304,6 +348,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, } /** + * struct msm_gpu_submitqueues - Userspace created context. + * * A submitqueue is associated with a gl context or vk queue (or equiv) * in userspace. * @@ -321,7 +367,7 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, * seqno, protected by submitqueue lock * @lock: submitqueue lock * @ref: reference count - * @entity: the submit job-queue + * @entity: the submit job-queue */ struct msm_gpu_submitqueue { int id; @@ -333,7 +379,7 @@ struct msm_gpu_submitqueue { struct idr fence_idr; struct mutex lock; struct kref ref; - struct drm_sched_entity entity; + struct drm_sched_entity *entity; }; struct msm_gpu_state_bo { @@ -421,6 +467,33 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu); +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id); +int msm_submitqueue_create(struct drm_device *drm, + struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); +int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx, + struct drm_msm_submitqueue_query *args); +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); +void msm_submitqueue_close(struct msm_file_private *ctx); + +void msm_submitqueue_destroy(struct kref *kref); + +void __msm_file_private_destroy(struct kref *kref); + +static inline void msm_file_private_put(struct msm_file_private *ctx) +{ + kref_put(&ctx->ref, __msm_file_private_destroy); +} + +static inline struct msm_file_private *msm_file_private_get( + struct msm_file_private *ctx) +{ + kref_get(&ctx->ref); + return ctx; +} + void msm_devfreq_init(struct msm_gpu *gpu); void msm_devfreq_cleanup(struct msm_gpu *gpu); void msm_devfreq_resume(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 0a1ee20296a2..8b7473f69cb8 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -88,8 +88,12 @@ static struct devfreq_dev_profile msm_devfreq_profile = { .get_cur_freq = msm_devfreq_get_cur_freq, }; +static void msm_devfreq_idle_work(struct kthread_work *work); + void msm_devfreq_init(struct msm_gpu *gpu) { + struct msm_gpu_devfreq *df = &gpu->devfreq; + /* We need target support to do devfreq */ if (!gpu->funcs->gpu_busy) return; @@ -105,25 +109,27 @@ void msm_devfreq_init(struct msm_gpu *gpu) msm_devfreq_profile.freq_table = NULL; msm_devfreq_profile.max_state = 0; - gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, + df->devfreq = devm_devfreq_add_device(&gpu->pdev->dev, &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL); - if (IS_ERR(gpu->devfreq.devfreq)) { + if (IS_ERR(df->devfreq)) { DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); - gpu->devfreq.devfreq = NULL; + df->devfreq = NULL; return; } - devfreq_suspend_device(gpu->devfreq.devfreq); + devfreq_suspend_device(df->devfreq); - gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, - gpu->devfreq.devfreq); + gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, df->devfreq); if (IS_ERR(gpu->cooling)) { DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't register GPU cooling device\n"); gpu->cooling = NULL; } + + msm_hrtimer_work_init(&df->idle_work, gpu->worker, msm_devfreq_idle_work, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); } void msm_devfreq_cleanup(struct msm_gpu *gpu) @@ -151,6 +157,14 @@ void msm_devfreq_active(struct msm_gpu *gpu) unsigned int idle_time; unsigned long target_freq = df->idle_freq; + if (!df->devfreq) + return; + + /* + * Cancel any pending transition to idle frequency: + */ + hrtimer_cancel(&df->idle_work.timer); + /* * Hold devfreq lock to synchronize with get_dev_status()/ * target() callbacks @@ -181,11 +195,17 @@ void msm_devfreq_active(struct msm_gpu *gpu) mutex_unlock(&df->devfreq->lock); } -void msm_devfreq_idle(struct msm_gpu *gpu) + +static void msm_devfreq_idle_work(struct kthread_work *work) { - struct msm_gpu_devfreq *df = &gpu->devfreq; + struct msm_gpu_devfreq *df = container_of(work, + struct msm_gpu_devfreq, idle_work.work); + struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq); unsigned long idle_freq, target_freq = 0; + if (!df->devfreq) + return; + /* * Hold devfreq lock to synchronize with get_dev_status()/ * target() callbacks @@ -194,10 +214,19 @@ void msm_devfreq_idle(struct msm_gpu *gpu) idle_freq = get_freq(gpu); - msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + if (gpu->clamp_to_idle) + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); df->idle_time = ktime_get(); df->idle_freq = idle_freq; mutex_unlock(&df->devfreq->lock); } + +void msm_devfreq_idle(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + + msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), + HRTIMER_MODE_ABS); +} diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index de2bc3467bb5..6a42b819abc4 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -136,8 +136,7 @@ struct msm_kms; * shortly before vblank to flush pending async updates. */ struct msm_pending_timer { - struct hrtimer timer; - struct kthread_work work; + struct msm_hrtimer_work work; struct kthread_worker *worker; struct msm_kms *kms; unsigned crtc_idx; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 32a55d81b58b..7cb158bcbcf6 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -7,6 +7,24 @@ #include "msm_gpu.h" +void __msm_file_private_destroy(struct kref *kref) +{ + struct msm_file_private *ctx = container_of(kref, + struct msm_file_private, ref); + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->entities); i++) { + if (!ctx->entities[i]) + continue; + + drm_sched_entity_destroy(ctx->entities[i]); + kfree(ctx->entities[i]); + } + + msm_gem_address_space_put(ctx->aspace); + kfree(ctx); +} + void msm_submitqueue_destroy(struct kref *kref) { struct msm_gpu_submitqueue *queue = container_of(kref, @@ -14,8 +32,6 @@ void msm_submitqueue_destroy(struct kref *kref) idr_destroy(&queue->fence_idr); - drm_sched_entity_destroy(&queue->entity); - msm_file_private_put(queue->ctx); kfree(queue); @@ -61,13 +77,48 @@ void msm_submitqueue_close(struct msm_file_private *ctx) } } +static struct drm_sched_entity * +get_sched_entity(struct msm_file_private *ctx, struct msm_ringbuffer *ring, + unsigned ring_nr, enum drm_sched_priority sched_prio) +{ + static DEFINE_MUTEX(entity_lock); + unsigned idx = (ring_nr * NR_SCHED_PRIORITIES) + sched_prio; + + /* We should have already validated that the requested priority is + * valid by the time we get here. + */ + if (WARN_ON(idx >= ARRAY_SIZE(ctx->entities))) + return ERR_PTR(-EINVAL); + + mutex_lock(&entity_lock); + + if (!ctx->entities[idx]) { + struct drm_sched_entity *entity; + struct drm_gpu_scheduler *sched = &ring->sched; + int ret; + + entity = kzalloc(sizeof(*ctx->entities[idx]), GFP_KERNEL); + + ret = drm_sched_entity_init(entity, sched_prio, &sched, 1, NULL); + if (ret) { + mutex_unlock(&entity_lock); + kfree(entity); + return ERR_PTR(ret); + } + + ctx->entities[idx] = entity; + } + + mutex_unlock(&entity_lock); + + return ctx->entities[idx]; +} + int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, u32 prio, u32 flags, u32 *id) { struct msm_drm_private *priv = drm->dev_private; struct msm_gpu_submitqueue *queue; - struct msm_ringbuffer *ring; - struct drm_gpu_scheduler *sched; enum drm_sched_priority sched_prio; unsigned ring_nr; int ret; @@ -91,12 +142,10 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, queue->flags = flags; queue->ring_nr = ring_nr; - ring = priv->gpu->rb[ring_nr]; - sched = &ring->sched; - - ret = drm_sched_entity_init(&queue->entity, - sched_prio, &sched, 1, NULL); - if (ret) { + queue->entity = get_sched_entity(ctx, priv->gpu->rb[ring_nr], + ring_nr, sched_prio); + if (IS_ERR(queue->entity)) { + ret = PTR_ERR(queue->entity); kfree(queue); return ret; } @@ -140,10 +189,6 @@ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) */ default_prio = DIV_ROUND_UP(max_priority, 2); - INIT_LIST_HEAD(&ctx->submitqueues); - - rwlock_init(&ctx->queuelock); - return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); } diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index ec0432fe1bdf..86d78634a979 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -173,7 +173,11 @@ static void mxsfb_irq_disable(struct drm_device *drm) struct mxsfb_drm_private *mxsfb = drm->dev_private; mxsfb_enable_axi_clk(mxsfb); - mxsfb->crtc.funcs->disable_vblank(&mxsfb->crtc); + + /* Disable and clear VBLANK IRQ */ + writel(CTRL1_CUR_FRAME_DONE_IRQ_EN, mxsfb->base + LCDC_CTRL1 + REG_CLR); + writel(CTRL1_CUR_FRAME_DONE_IRQ, mxsfb->base + LCDC_CTRL1 + REG_CLR); + mxsfb_disable_axi_clk(mxsfb); } diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c index b8c31b697797..66f32d965c72 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/crc.c +++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c @@ -704,6 +704,7 @@ static const struct file_operations nv50_crc_flip_threshold_fops = { .open = nv50_crc_debugfs_flip_threshold_open, .read = seq_read, .write = nv50_crc_debugfs_flip_threshold_set, + .release = single_release, }; int nv50_head_crc_late_register(struct nv50_head *head) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index d7b9f7f8c9e3..8e28403ea9b1 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1414,7 +1414,7 @@ nv50_mstm_prepare(struct nv50_mstm *mstm) int ret; NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name); - ret = drm_dp_update_payload_part1(&mstm->mgr); + ret = drm_dp_update_payload_part1(&mstm->mgr, 1); drm_for_each_encoder(encoder, mstm->outp->base.base.dev) { if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index d66f97280282..72099d1e4816 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -52,6 +52,7 @@ nv50_head_flush_clr(struct nv50_head *head, void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh) { + if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.olut ) { asyh->olut.offset = nv50_lut_load(&head->olut, asyh->olut.buffer, @@ -67,7 +68,6 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) if (asyh->set.view ) head->func->view (head, asyh); if (asyh->set.mode ) head->func->mode (head, asyh); if (asyh->set.core ) head->func->core_set(head, asyh); - if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.base ) head->func->base (head, asyh); if (asyh->set.ovly ) head->func->ovly (head, asyh); if (asyh->set.dither ) head->func->dither (head, asyh); diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index c68cc957248e..a582c0cb0cb0 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -71,6 +71,7 @@ #define PASCAL_CHANNEL_GPFIFO_A /* cla06f.h */ 0x0000c06f #define VOLTA_CHANNEL_GPFIFO_A /* clc36f.h */ 0x0000c36f #define TURING_CHANNEL_GPFIFO_A /* clc36f.h */ 0x0000c46f +#define AMPERE_CHANNEL_GPFIFO_B /* clc36f.h */ 0x0000c76f #define NV50_DISP /* cl5070.h */ 0x00005070 #define G82_DISP /* cl5070.h */ 0x00008270 @@ -200,6 +201,7 @@ #define PASCAL_DMA_COPY_B 0x0000c1b5 #define VOLTA_DMA_COPY_A 0x0000c3b5 #define TURING_DMA_COPY_A 0x0000c5b5 +#define AMPERE_DMA_COPY_B 0x0000c7b5 #define FERMI_DECOMPRESS 0x000090b8 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index 54fab7cc36c1..64ee82c7c1be 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -77,4 +77,5 @@ int gp100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct int gp10b_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); int gv100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); int tu102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); +int ga102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b2c7e0802ac3..12b107acb6ee 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -844,6 +844,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct ttm_resource *, struct ttm_resource *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { + { "COPY", 4, 0xc7b5, nve0_bo_move_copy, nve0_bo_move_init }, { "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init }, diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 80099ef75702..ea7769135b0d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -250,7 +250,8 @@ static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, u64 runlist, bool priv, struct nouveau_channel **pchan) { - static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A, + static const u16 oclasses[] = { AMPERE_CHANNEL_GPFIFO_B, + TURING_CHANNEL_GPFIFO_A, VOLTA_CHANNEL_GPFIFO_A, PASCAL_CHANNEL_GPFIFO_A, MAXWELL_CHANNEL_GPFIFO_A, @@ -386,7 +387,8 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) nvif_object_map(&chan->user, NULL, 0); - if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) { + if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO && + chan->user.oclass < AMPERE_CHANNEL_GPFIFO_B) { ret = nvif_notify_ctor(&chan->user, "abi16ChanKilled", nouveau_channel_killed, true, NV906F_V0_NTFY_KILLED, diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index c2bc05eb2e54..1cbe01048b93 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -207,6 +207,7 @@ static const struct file_operations nouveau_pstate_fops = { .open = nouveau_debugfs_pstate_open, .read = seq_read, .write = nouveau_debugfs_pstate_set, + .release = single_release, }; static struct drm_info_list nouveau_debugfs_list[] = { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 1f828c9f691c..6109cd9e3399 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -345,6 +345,9 @@ nouveau_accel_gr_init(struct nouveau_drm *drm) u32 arg0, arg1; int ret; + if (device->info.family >= NV_DEVICE_INFO_V0_AMPERE) + return; + /* Allocate channel that has access to the graphics engine. */ if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { arg0 = nvif_fifo_runlist(device, NV_DEVICE_HOST_RUNLIST_ENGINES_GR); @@ -469,6 +472,7 @@ nouveau_accel_init(struct nouveau_drm *drm) case PASCAL_CHANNEL_GPFIFO_A: case VOLTA_CHANNEL_GPFIFO_A: case TURING_CHANNEL_GPFIFO_A: + case AMPERE_CHANNEL_GPFIFO_B: ret = nvc0_fence_create(drm); break; default: diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 5b27845075a1..8c2ecc282723 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -247,10 +247,8 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, } ret = nouveau_bo_init(nvbo, size, align, domain, NULL, NULL); - if (ret) { - nouveau_bo_ref(NULL, &nvbo); + if (ret) return ret; - } /* we restrict allowed domains on nv50+ to only the types * that were requested at creation time. not possibly on diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 7c9c928c3196..c3526a8622e3 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -204,7 +204,7 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv84_fence_context_new; priv->base.context_del = nv84_fence_context_del; - priv->base.uevent = true; + priv->base.uevent = drm->client.device.info.family < NV_DEVICE_INFO_V0_AMPERE; mutex_init(&priv->mutex); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 93ddf63d1114..ca75c5f6ecaf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2602,6 +2602,7 @@ nv172_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static const struct nvkm_device_chip @@ -2622,6 +2623,7 @@ nv174_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static const struct nvkm_device_chip @@ -2642,6 +2644,7 @@ nv177_chipset = { .top = { 0x00000001, ga100_top_new }, .disp = { 0x00000001, ga102_disp_new }, .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71aefde..ce774579c89d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c @@ -57,7 +57,7 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; args->v0.ustate_dc = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; - args->v0.pwrsrc = -ENOSYS; + args->v0.pwrsrc = -ENODEV; args->v0.pstate = NVIF_CONTROL_PSTATE_INFO_V0_PSTATE_UNKNOWN; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild index 3209eb7af65f..5e831d347a95 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild @@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/fifo/gp100.o nvkm-y += nvkm/engine/fifo/gp10b.o nvkm-y += nvkm/engine/fifo/gv100.o nvkm-y += nvkm/engine/fifo/tu102.o +nvkm-y += nvkm/engine/fifo/ga102.o nvkm-y += nvkm/engine/fifo/chan.o nvkm-y += nvkm/engine/fifo/channv50.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c index 353b77d9b3dc..3492c561f2cf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c @@ -82,7 +82,7 @@ g84_fifo_chan_engine_fini(struct nvkm_fifo_chan *base, if (offset < 0) return 0; - engn = fifo->base.func->engine_id(&fifo->base, engine); + engn = fifo->base.func->engine_id(&fifo->base, engine) - 1; save = nvkm_mask(device, 0x002520, 0x0000003f, 1 << engn); nvkm_wr32(device, 0x0032fc, chan->base.inst->addr >> 12); done = nvkm_msec(device, 2000, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c new file mode 100644 index 000000000000..c630dbd2911a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -0,0 +1,311 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define ga102_fifo(p) container_of((p), struct ga102_fifo, base.engine) +#define ga102_chan(p) container_of((p), struct ga102_chan, object) +#include <engine/fifo.h> +#include "user.h" + +#include <core/memory.h> +#include <subdev/mmu.h> +#include <subdev/timer.h> +#include <subdev/top.h> + +#include <nvif/cl0080.h> +#include <nvif/clc36f.h> +#include <nvif/class.h> + +struct ga102_fifo { + struct nvkm_fifo base; +}; + +struct ga102_chan { + struct nvkm_object object; + + struct { + u32 runl; + u32 chan; + } ctrl; + + struct nvkm_memory *mthd; + struct nvkm_memory *inst; + struct nvkm_memory *user; + struct nvkm_memory *runl; + + struct nvkm_vmm *vmm; +}; + +static int +ga102_chan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass) +{ + if (index == 0) { + oclass->ctor = nvkm_object_new; + oclass->base = (struct nvkm_sclass) { -1, -1, AMPERE_DMA_COPY_B }; + return 0; + } + + return -EINVAL; +} + +static int +ga102_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + u64 bar2 = nvkm_memory_bar2(chan->user); + + if (bar2 == ~0ULL) + return -EFAULT; + + *type = NVKM_OBJECT_MAP_IO; + *addr = device->func->resource_addr(device, 3) + bar2; + *size = 0x1000; + return 0; +} + +static int +ga102_chan_fini(struct nvkm_object *object, bool suspend) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + + nvkm_wr32(device, chan->ctrl.chan, 0x00000003); + + nvkm_wr32(device, chan->ctrl.runl + 0x098, 0x01000000); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, chan->ctrl.runl + 0x098) & 0x00100000)) + break; + ); + + nvkm_wr32(device, chan->ctrl.runl + 0x088, 0); + + nvkm_wr32(device, chan->ctrl.chan, 0xffffffff); + return 0; +} + +static int +ga102_chan_init(struct nvkm_object *object) +{ + struct ga102_chan *chan = ga102_chan(object); + struct nvkm_device *device = chan->object.engine->subdev.device; + + nvkm_mask(device, chan->ctrl.runl + 0x300, 0x80000000, 0x80000000); + + nvkm_wr32(device, chan->ctrl.runl + 0x080, lower_32_bits(nvkm_memory_addr(chan->runl))); + nvkm_wr32(device, chan->ctrl.runl + 0x084, upper_32_bits(nvkm_memory_addr(chan->runl))); + nvkm_wr32(device, chan->ctrl.runl + 0x088, 2); + + nvkm_wr32(device, chan->ctrl.chan, 0x00000002); + nvkm_wr32(device, chan->ctrl.runl + 0x0090, 0); + return 0; +} + +static void * +ga102_chan_dtor(struct nvkm_object *object) +{ + struct ga102_chan *chan = ga102_chan(object); + + if (chan->vmm) { + nvkm_vmm_part(chan->vmm, chan->inst); + nvkm_vmm_unref(&chan->vmm); + } + + nvkm_memory_unref(&chan->runl); + nvkm_memory_unref(&chan->user); + nvkm_memory_unref(&chan->inst); + nvkm_memory_unref(&chan->mthd); + return chan; +} + +static const struct nvkm_object_func +ga102_chan = { + .dtor = ga102_chan_dtor, + .init = ga102_chan_init, + .fini = ga102_chan_fini, + .map = ga102_chan_map, + .sclass = ga102_chan_sclass, +}; + +static int +ga102_chan_new(struct nvkm_device *device, + const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject) +{ + struct volta_channel_gpfifo_a_v0 *args = argv; + struct nvkm_top_device *tdev; + struct nvkm_vmm *vmm; + struct ga102_chan *chan; + int ret; + + if (argc != sizeof(*args)) + return -ENOSYS; + + vmm = nvkm_uvmm_search(oclass->client, args->vmm); + if (IS_ERR(vmm)) + return PTR_ERR(vmm); + + if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) + return -ENOMEM; + + nvkm_object_ctor(&ga102_chan, oclass, &chan->object); + *pobject = &chan->object; + + list_for_each_entry(tdev, &device->top->device, head) { + if (tdev->type == NVKM_ENGINE_CE) { + chan->ctrl.runl = tdev->runlist; + break; + } + } + + if (!chan->ctrl.runl) + return -ENODEV; + + chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0; + + args->chid = 0; + args->inst = 0; + args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->inst); + if (ret) + return ret; + + nvkm_kmap(chan->inst); + nvkm_wo32(chan->inst, 0x010, 0x0000face); + nvkm_wo32(chan->inst, 0x030, 0x7ffff902); + nvkm_wo32(chan->inst, 0x048, lower_32_bits(args->ioffset)); + nvkm_wo32(chan->inst, 0x04c, upper_32_bits(args->ioffset) | + (order_base_2(args->ilength / 8) << 16)); + nvkm_wo32(chan->inst, 0x084, 0x20400000); + nvkm_wo32(chan->inst, 0x094, 0x30000001); + nvkm_wo32(chan->inst, 0x0ac, 0x00020000); + nvkm_wo32(chan->inst, 0x0e4, 0x00000000); + nvkm_wo32(chan->inst, 0x0e8, 0); + nvkm_wo32(chan->inst, 0x0f4, 0x00001000); + nvkm_wo32(chan->inst, 0x0f8, 0x10003080); + nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000); + nvkm_wo32(chan->inst, 0x220, lower_32_bits(nvkm_memory_bar2(chan->mthd))); + nvkm_wo32(chan->inst, 0x224, upper_32_bits(nvkm_memory_bar2(chan->mthd))); + nvkm_done(chan->inst); + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->user); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->runl); + if (ret) + return ret; + + nvkm_kmap(chan->runl); + nvkm_wo32(chan->runl, 0x00, 0x80030001); + nvkm_wo32(chan->runl, 0x04, 1); + nvkm_wo32(chan->runl, 0x08, 0); + nvkm_wo32(chan->runl, 0x0c, 0x00000000); + nvkm_wo32(chan->runl, 0x10, lower_32_bits(nvkm_memory_addr(chan->user))); + nvkm_wo32(chan->runl, 0x14, upper_32_bits(nvkm_memory_addr(chan->user))); + nvkm_wo32(chan->runl, 0x18, lower_32_bits(nvkm_memory_addr(chan->inst))); + nvkm_wo32(chan->runl, 0x1c, upper_32_bits(nvkm_memory_addr(chan->inst))); + nvkm_done(chan->runl); + + ret = nvkm_vmm_join(vmm, chan->inst); + if (ret) + return ret; + + chan->vmm = nvkm_vmm_ref(vmm); + return 0; +} + +static const struct nvkm_device_oclass +ga102_chan_oclass = { + .ctor = ga102_chan_new, +}; + +static int +ga102_user_new(struct nvkm_device *device, + const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject) +{ + return tu102_fifo_user_new(oclass, argv, argc, pobject); +} + +static const struct nvkm_device_oclass +ga102_user_oclass = { + .ctor = ga102_user_new, +}; + +static int +ga102_fifo_sclass(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class) +{ + if (index == 0) { + oclass->base = (struct nvkm_sclass) { -1, -1, VOLTA_USERMODE_A }; + *class = &ga102_user_oclass; + return 0; + } else + if (index == 1) { + oclass->base = (struct nvkm_sclass) { 0, 0, AMPERE_CHANNEL_GPFIFO_B }; + *class = &ga102_chan_oclass; + return 0; + } + + return 2; +} + +static int +ga102_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data) +{ + switch (mthd) { + case NV_DEVICE_HOST_CHANNELS: *data = 1; return 0; + default: + break; + } + + return -ENOSYS; +} + +static void * +ga102_fifo_dtor(struct nvkm_engine *engine) +{ + return ga102_fifo(engine); +} + +static const struct nvkm_engine_func +ga102_fifo = { + .dtor = ga102_fifo_dtor, + .info = ga102_fifo_info, + .base.sclass = ga102_fifo_sclass, +}; + +int +ga102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, + struct nvkm_fifo **pfifo) +{ + struct ga102_fifo *fifo; + + if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL))) + return -ENOMEM; + + nvkm_engine_ctor(&ga102_fifo, device, type, inst, true, &fifo->base.engine); + *pfifo = &fifo->base; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c index 31933f3e5a07..c982d834c8d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c @@ -54,7 +54,7 @@ ga100_top_oneinit(struct nvkm_top *top) info->reset = (data & 0x0000001f); break; case 2: - info->runlist = (data & 0x0000fc00) >> 10; + info->runlist = (data & 0x00fffc00); info->engine = (data & 0x00000003); break; default: @@ -85,9 +85,10 @@ ga100_top_oneinit(struct nvkm_top *top) } nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d " - "runlist %2d engine %2d reset %2d\n", type, inst, + "runlist %6x engine %2d reset %2d\n", type, inst, info->type == NVKM_SUBDEV_NR ? "????????" : nvkm_subdev_type[info->type], - info->addr, info->fault, info->runlist, info->engine, info->reset); + info->addr, info->fault, info->runlist < 0 ? 0 : info->runlist, + info->engine, info->reset); info = NULL; } diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 369cb76512fe..2cb8eba76af8 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -307,6 +307,7 @@ config DRM_PANEL_OLIMEX_LCD_OLINUXINO depends on OF depends on I2C depends on BACKLIGHT_CLASS_DEVICE + select CRC32 help The panel is used with different sizes LCDs, from 480x272 to 1280x800, and 24 bit per pixel. diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c index 2d8794d495d0..3d8a9ab47cae 100644 --- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c +++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c @@ -146,8 +146,8 @@ static const struct reg_sequence y030xx067a_init_sequence[] = { { 0x09, REG09_SUB_BRIGHT_R(0x20) }, { 0x0a, REG0A_SUB_BRIGHT_B(0x20) }, { 0x0b, REG0B_HD_FREERUN | REG0B_VD_FREERUN }, - { 0x0c, REG0C_CONTRAST_R(0x10) }, - { 0x0d, REG0D_CONTRAST_G(0x10) }, + { 0x0c, REG0C_CONTRAST_R(0x00) }, + { 0x0d, REG0D_CONTRAST_G(0x00) }, { 0x0e, REG0E_CONTRAST_B(0x10) }, { 0x0f, 0 }, { 0x10, REG10_BRIGHT(0x7f) }, diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index 0145129d7c66..534dd7414d42 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -590,14 +590,14 @@ static const struct drm_display_mode k101_im2byl02_default_mode = { .clock = 69700, .hdisplay = 800, - .hsync_start = 800 + 6, - .hsync_end = 800 + 6 + 15, - .htotal = 800 + 6 + 15 + 16, + .hsync_start = 800 + 52, + .hsync_end = 800 + 52 + 8, + .htotal = 800 + 52 + 8 + 48, .vdisplay = 1280, - .vsync_start = 1280 + 8, - .vsync_end = 1280 + 8 + 48, - .vtotal = 1280 + 8 + 48 + 52, + .vsync_start = 1280 + 16, + .vsync_end = 1280 + 16 + 6, + .vtotal = 1280 + 16 + 6 + 15, .width_mm = 135, .height_mm = 217, diff --git a/drivers/gpu/drm/r128/ati_pcigart.c b/drivers/gpu/drm/r128/ati_pcigart.c index 26001c2de9e9..dde0501aea68 100644 --- a/drivers/gpu/drm/r128/ati_pcigart.c +++ b/drivers/gpu/drm/r128/ati_pcigart.c @@ -215,7 +215,7 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga } ret = 0; -#if defined(__i386__) || defined(__x86_64__) +#ifdef CONFIG_X86 wbinvd(); #else mb(); diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index ec867fa880a4..751c2c075e09 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -423,7 +423,7 @@ radeon_mst_encoder_dpms(struct drm_encoder *encoder, int mode) drm_dp_mst_allocate_vcpi(&radeon_connector->mst_port->mst_mgr, radeon_connector->port, mst_enc->pbn, slots); - drm_dp_update_payload_part1(&radeon_connector->mst_port->mst_mgr); + drm_dp_update_payload_part1(&radeon_connector->mst_port->mst_mgr, 1); radeon_dp_mst_set_be_cntl(primary, mst_enc, radeon_connector->mst_port->hpd.hpd, true); @@ -452,7 +452,7 @@ radeon_mst_encoder_dpms(struct drm_encoder *encoder, int mode) return; drm_dp_mst_reset_vcpi_slots(&radeon_connector->mst_port->mst_mgr, mst_enc->port); - drm_dp_update_payload_part1(&radeon_connector->mst_port->mst_mgr); + drm_dp_update_payload_part1(&radeon_connector->mst_port->mst_mgr, 1); drm_dp_check_act_status(&radeon_connector->mst_port->mst_mgr); /* and this can also fail */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index ba9e14da41b4..a25b98b7f5bd 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1174,26 +1174,24 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc, * * Action plan: * - * 1. When DRM gives us a mode, we should add 999 Hz to it. That way - * if the clock we need is 60000001 Hz (~60 MHz) and DRM tells us to - * make 60000 kHz then the clock framework will actually give us - * the right clock. + * 1. Try to set the exact rate first, and confirm the clock framework + * can provide it. * - * NOTE: if the PLL (maybe through a divider) could actually make - * a clock rate 999 Hz higher instead of the one we want then this - * could be a problem. Unfortunately there's not much we can do - * since it's baked into DRM to use kHz. It shouldn't matter in - * practice since Rockchip PLLs are controlled by tables and - * even if there is a divider in the middle I wouldn't expect PLL - * rates in the table that are just a few kHz different. + * 2. If the clock framework cannot provide the exact rate, we should + * add 999 Hz to the requested rate. That way if the clock we need + * is 60000001 Hz (~60 MHz) and DRM tells us to make 60000 kHz then + * the clock framework will actually give us the right clock. * - * 2. Get the clock framework to round the rate for us to tell us + * 3. Get the clock framework to round the rate for us to tell us * what it will actually make. * - * 3. Store the rounded up rate so that we don't need to worry about + * 4. Store the rounded up rate so that we don't need to worry about * this in the actual clk_set_rate(). */ - rate = clk_round_rate(vop->dclk, adjusted_mode->clock * 1000 + 999); + rate = clk_round_rate(vop->dclk, adjusted_mode->clock * 1000); + if (rate / 1000 != adjusted_mode->clock) + rate = clk_round_rate(vop->dclk, + adjusted_mode->clock * 1000 + 999); adjusted_mode->clock = DIV_ROUND_UP(rate, 1000); return true; diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 21d473deb757..a8d75fd7e9f4 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -213,11 +213,13 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, goto err_disable_clk_tmds; } + ret = sun8i_hdmi_phy_init(hdmi->phy); + if (ret) + goto err_disable_clk_tmds; + drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); - sun8i_hdmi_phy_init(hdmi->phy); - plat_data->mode_valid = hdmi->quirks->mode_valid; plat_data->use_drm_infoframe = hdmi->quirks->use_drm_infoframe; sun8i_hdmi_phy_set_ops(hdmi->phy, plat_data); @@ -259,6 +261,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev); dw_hdmi_unbind(hdmi->hdmi); + sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); gpiod_set_value(hdmi->ddc_en, 0); diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index 74f6ed0e2570..bffe1b9cd3dc 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -169,6 +169,7 @@ struct sun8i_hdmi_phy { struct clk *clk_phy; struct clk *clk_pll0; struct clk *clk_pll1; + struct device *dev; unsigned int rcal; struct regmap *regs; struct reset_control *rst_phy; @@ -205,7 +206,8 @@ encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder) int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node); -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy); void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, struct dw_hdmi_plat_data *plat_data); diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index c9239708d398..b64d93da651d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -506,9 +506,60 @@ static void sun8i_hdmi_phy_init_h3(struct sun8i_hdmi_phy *phy) phy->rcal = (val & SUN8I_HDMI_PHY_ANA_STS_RCAL_MASK) >> 2; } -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) { + int ret; + + ret = reset_control_deassert(phy->rst_phy); + if (ret) { + dev_err(phy->dev, "Cannot deassert phy reset control: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(phy->clk_bus); + if (ret) { + dev_err(phy->dev, "Cannot enable bus clock: %d\n", ret); + goto err_assert_rst_phy; + } + + ret = clk_prepare_enable(phy->clk_mod); + if (ret) { + dev_err(phy->dev, "Cannot enable mod clock: %d\n", ret); + goto err_disable_clk_bus; + } + + if (phy->variant->has_phy_clk) { + ret = sun8i_phy_clk_create(phy, phy->dev, + phy->variant->has_second_pll); + if (ret) { + dev_err(phy->dev, "Couldn't create the PHY clock\n"); + goto err_disable_clk_mod; + } + + clk_prepare_enable(phy->clk_phy); + } + phy->variant->phy_init(phy); + + return 0; + +err_disable_clk_mod: + clk_disable_unprepare(phy->clk_mod); +err_disable_clk_bus: + clk_disable_unprepare(phy->clk_bus); +err_assert_rst_phy: + reset_control_assert(phy->rst_phy); + + return ret; +} + +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy) +{ + clk_disable_unprepare(phy->clk_mod); + clk_disable_unprepare(phy->clk_bus); + clk_disable_unprepare(phy->clk_phy); + + reset_control_assert(phy->rst_phy); } void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, @@ -638,6 +689,7 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) return -ENOMEM; phy->variant = (struct sun8i_hdmi_phy_variant *)match->data; + phy->dev = dev; ret = of_address_to_resource(node, 0, &res); if (ret) { @@ -696,47 +748,10 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) goto err_put_clk_pll1; } - ret = reset_control_deassert(phy->rst_phy); - if (ret) { - dev_err(dev, "Cannot deassert phy reset control: %d\n", ret); - goto err_put_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_bus); - if (ret) { - dev_err(dev, "Cannot enable bus clock: %d\n", ret); - goto err_deassert_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_mod); - if (ret) { - dev_err(dev, "Cannot enable mod clock: %d\n", ret); - goto err_disable_clk_bus; - } - - if (phy->variant->has_phy_clk) { - ret = sun8i_phy_clk_create(phy, dev, - phy->variant->has_second_pll); - if (ret) { - dev_err(dev, "Couldn't create the PHY clock\n"); - goto err_disable_clk_mod; - } - - clk_prepare_enable(phy->clk_phy); - } - platform_set_drvdata(pdev, phy); return 0; -err_disable_clk_mod: - clk_disable_unprepare(phy->clk_mod); -err_disable_clk_bus: - clk_disable_unprepare(phy->clk_bus); -err_deassert_rst_phy: - reset_control_assert(phy->rst_phy); -err_put_rst_phy: - reset_control_put(phy->rst_phy); err_put_clk_pll1: clk_put(phy->clk_pll1); err_put_clk_pll0: @@ -753,12 +768,6 @@ static int sun8i_hdmi_phy_remove(struct platform_device *pdev) { struct sun8i_hdmi_phy *phy = platform_get_drvdata(pdev); - clk_disable_unprepare(phy->clk_mod); - clk_disable_unprepare(phy->clk_bus); - clk_disable_unprepare(phy->clk_phy); - - reset_control_assert(phy->rst_phy); - reset_control_put(phy->rst_phy); clk_put(phy->clk_pll0); diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 16c7aabb94d3..a29d64f87563 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1845,7 +1845,6 @@ tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, bool prepare_bandwidth_transition) { const struct tegra_plane_state *old_tegra_state, *new_tegra_state; - const struct tegra_dc_state *old_dc_state, *new_dc_state; u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; const struct drm_plane_state *old_plane_state; const struct drm_crtc_state *old_crtc_state; @@ -1858,8 +1857,6 @@ tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, return; old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); - old_dc_state = to_const_dc_state(old_crtc_state); - new_dc_state = to_const_dc_state(crtc->state); if (!crtc->state->active) { if (!old_crtc_state->active) diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index f0cb691852a1..40378308d527 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -35,12 +35,6 @@ static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state) return NULL; } -static inline const struct tegra_dc_state * -to_const_dc_state(const struct drm_crtc_state *state) -{ - return to_dc_state((struct drm_crtc_state *)state); -} - struct tegra_dc_stats { unsigned long frames; unsigned long vblank; diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index dc16a24f4dbe..690a339c52ec 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -222,7 +222,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f mapping->iova = sg_dma_address(mapping->sgt->sgl); } - mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->size; + mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size; err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), GFP_KERNEL); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 6b03f89a98d4..3ddb7c710a3d 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -186,10 +186,8 @@ static void tilcdc_fini(struct drm_device *dev) if (priv->mmio) iounmap(priv->mmio); - if (priv->wq) { - flush_workqueue(priv->wq); + if (priv->wq) destroy_workqueue(priv->wq); - } dev->dev_private = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 1bc675d727b6..b284623e2863 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1395,14 +1395,6 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data, return 0; } -static const struct snd_soc_dapm_widget vc4_hdmi_audio_widgets[] = { - SND_SOC_DAPM_OUTPUT("TX"), -}; - -static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = { - { "TX", NULL, "Playback" }, -}; - static const struct snd_soc_component_driver vc4_hdmi_audio_cpu_dai_comp = { .name = "vc4-hdmi-cpu-dai-component", }; diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index edd17c30d5a5..7f7fe35fc21d 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -468,7 +468,6 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) struct ttm_mem_zone *zone; unsigned int i; - flush_workqueue(glob->swap_queue); destroy_workqueue(glob->swap_queue); glob->swap_queue = NULL; for (i = 0; i < glob->num_zones; ++i) { diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c index 6941add95d0f..ecab72882192 100644 --- a/drivers/gpu/host1x/fence.c +++ b/drivers/gpu/host1x/fence.c @@ -15,7 +15,7 @@ #include "intr.h" #include "syncpt.h" -DEFINE_SPINLOCK(lock); +static DEFINE_SPINLOCK(lock); struct host1x_syncpt_fence { struct dma_fence base; @@ -152,8 +152,10 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) return ERR_PTR(-ENOMEM); fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); - if (!fence->waiter) + if (!fence->waiter) { + kfree(fence); return ERR_PTR(-ENOMEM); + } fence->sp = sp; fence->threshold = threshold; |