diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml2_0')
73 files changed, 49052 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile new file mode 100644 index 000000000000..97e068b6bf6b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT */ +# +# Copyright 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Authors: AMD +# +# Makefile for dml2. + +dml2_ccflags := $(CC_FLAGS_FPU) +dml2_rcflags := $(CC_FLAGS_NO_FPU) + +ifneq ($(CONFIG_FRAME_WARN),0) + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif + else + frame_warn_limit := 2056 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif +endif + +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0 +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_core +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_mcg/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_dpmm/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_pmo/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/inc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/inc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/ + +CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_rcflags) + +DML2 = display_mode_core.o display_mode_util.o dml2_wrapper.o \ + dml2_utils.o dml2_policy.o dml2_translation_helper.o dml2_dc_resource_mgmt.o dml2_mall_phantom.o \ + dml_display_rq_dlg_calc.o + +AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2_0/,$(DML2)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DML2) + +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_rcflags) + +DML21 := src/dml2_top/dml2_top_interfaces.o +DML21 += src/dml2_top/dml2_top_soc15.o +DML21 += src/dml2_core/dml2_core_dcn4.o +DML21 += src/dml2_core/dml2_core_utils.o +DML21 += src/dml2_core/dml2_core_factory.o +DML21 += src/dml2_core/dml2_core_dcn4_calcs.o +DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o +DML21 += src/dml2_dpmm/dml2_dpmm_factory.o +DML21 += src/dml2_mcg/dml2_mcg_dcn4.o +DML21 += src/dml2_mcg/dml2_mcg_factory.o +DML21 += src/dml2_pmo/dml2_pmo_dcn3.o +DML21 += src/dml2_pmo/dml2_pmo_factory.o +DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o +DML21 += src/dml2_standalone_libraries/lib_float_math.o +DML21 += dml21_translation_helper.o +DML21 += dml21_wrapper.o +DML21 += dml21_utils.o + +AMD_DAL_DML21 = $(addprefix $(AMDDALPATH)/dc/dml2_0/dml21/,$(DML21)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DML21) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h new file mode 100644 index 000000000000..b954c9648fbe --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __CMNTYPES_H__ +#define __CMNTYPES_H__ + +#ifdef __GNUC__ +#if __GNUC__ == 4 && __GNUC_MINOR__ > 7 +typedef unsigned int uint; +#endif +#endif + +typedef signed char int8, *pint8; +typedef signed short int16, *pint16; +typedef signed int int32, *pint32; +typedef signed int64, *pint64; + +typedef unsigned char uint8, *puint8; +typedef unsigned short uint16, *puint16; +typedef unsigned int uint32, *puint32; +typedef unsigned uint64, *puint64; + +typedef unsigned long int ulong; +typedef unsigned char uchar; +typedef unsigned int uint; + +typedef void *pvoid; +typedef char *pchar; +typedef const void *const_pvoid; +typedef const char *const_pchar; + +typedef struct rgba_struct { + uint8 a; + uint8 r; + uint8 g; + uint8 b; +} rgba_t; + +typedef struct { + uint8 blue; + uint8 green; + uint8 red; + uint8 alpha; +} gen_color_t; + +typedef union { + uint32 val; + gen_color_t f; +} gen_color_u; + +// +// Types to make it easy to get or set the bits of a float/double. +// Avoids automatic casting from int to float and back. +// +#if 0 +typedef union { + uint32 i; + float f; +} uintfloat32; + +typedef union { + uint64 i; + double f; +} uintfloat64; + +#ifndef UNREFERENCED_PARAMETER +#define UNREFERENCED_PARAMETER(x) (x = x) +#endif +#endif + +#endif //__CMNTYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c new file mode 100644 index 000000000000..09303c282495 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c @@ -0,0 +1,10345 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "display_mode_util.h" +#include "display_mode_lib_defines.h" + +#include "dml_assert.h" + +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 +#define TB_BORROWED_MAX 400 +#define DML_MAX_VSTARTUP_START 1023 + +// --------------------------- +// Declaration Begins +// --------------------------- +static void CalculateBytePerPixelAndBlockSizes( + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + // Output + dml_uint_t *BytePerPixelY, + dml_uint_t *BytePerPixelC, + dml_float_t *BytePerPixelDETY, + dml_float_t *BytePerPixelDETC, + dml_uint_t *BlockHeight256BytesY, + dml_uint_t *BlockHeight256BytesC, + dml_uint_t *BlockWidth256BytesY, + dml_uint_t *BlockWidth256BytesC, + dml_uint_t *MacroTileHeightY, + dml_uint_t *MacroTileHeightC, + dml_uint_t *MacroTileWidthY, + dml_uint_t *MacroTileWidthC); + +static dml_float_t CalculateWriteBackDISPCLK( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t PixelClock, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackHTaps, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackSourceWidth, + dml_uint_t WritebackDestinationWidth, + dml_uint_t HTotal, + dml_uint_t WritebackLineBufferSize, + dml_float_t DISPCLKDPPCLKVCOSpeed); + +static void CalculateVMRowAndSwath( + struct display_mode_lib_scratch_st *s, + struct CalculateVMRowAndSwath_params_st *p); + +static void CalculateOutputLink( + dml_float_t PHYCLKPerState, + dml_float_t PHYCLKD18PerState, + dml_float_t PHYCLKD32PerState, + dml_float_t Downspreading, + dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClockBackEnd, + dml_float_t ForcedOutputLinkBPP, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t NumberOfDSCSlices, + dml_float_t AudioSampleRate, + dml_uint_t AudioSampleLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + enum dml_dsc_enable DSCEnable, + dml_uint_t OutputLinkDPLanes, + enum dml_output_link_dp_rate OutputLinkDPRate, + + // Output + dml_bool_t *RequiresDSC, + dml_bool_t *RequiresFEC, + dml_float_t *OutBpp, + enum dml_output_type_and_rate__type *OutputType, + enum dml_output_type_and_rate__rate *OutputRate, + dml_uint_t *RequiredSlots); + +static void CalculateODMMode( + dml_uint_t MaximumPixelsPerLinePerDSCUnit, + dml_uint_t HActive, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + enum dml_odm_use_policy ODMUse, + dml_float_t StateDispclk, + dml_float_t MaxDispclk, + dml_bool_t DSCEnable, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t MaxNumDPP, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_uint_t NumberOfDSCSlices, + + // Output + dml_bool_t *TotalAvailablePipesSupport, + dml_uint_t *NumberOfDPP, + enum dml_odm_mode *ODMMode, + dml_float_t *RequiredDISPCLKPerSurface); + +static dml_float_t CalculateRequiredDispclk( + enum dml_odm_mode ODMMode, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t MaxDispclkSingle); + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + dml_float_t HRatio, + dml_float_t HRatioChroma, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t MaxDCHUBToPSCLThroughput, + dml_float_t MaxPSCLToLBThroughput, + dml_float_t PixelClock, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t HTaps, + dml_uint_t HTapsChroma, + dml_uint_t VTaps, + dml_uint_t VTapsChroma, + + // Output + dml_float_t *PSCL_THROUGHPUT, + dml_float_t *PSCL_THROUGHPUT_CHROMA, + dml_float_t *DPPCLKUsingSingleDPP); + +static void CalculateDPPCLK( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t DPPCLKUsingSingleDPP[], + dml_uint_t DPPPerSurface[], + + // Output + dml_float_t *GlobalDPPCLK, + dml_float_t Dppclk[]); + +static void CalculateMALLUseForStaticScreen( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCNFinal, + enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t one_row_per_frame_fits_in_buffer[], + + // Output + dml_bool_t UsesMALLForStaticScreen[]); + +static dml_uint_t dscceComputeDelay( + dml_uint_t bpc, + dml_float_t BPP, + dml_uint_t sliceWidth, + dml_uint_t numSlices, + enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output); + +static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output); + +static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, + struct CalculatePrefetchSchedule_params_st *p); + +static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed); + +static void CalculateDCCConfiguration( + dml_bool_t DCCEnabled, + dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceWidthLuma, + dml_uint_t SurfaceWidthChroma, + dml_uint_t SurfaceHeightLuma, + dml_uint_t SurfaceHeightChroma, + dml_uint_t nomDETInKByte, + dml_uint_t RequestHeight256ByteLuma, + dml_uint_t RequestHeight256ByteChroma, + enum dml_swizzle_mode TilingFormat, + dml_uint_t BytePerPixelY, + dml_uint_t BytePerPixelC, + dml_float_t BytePerPixelDETY, + dml_float_t BytePerPixelDETC, + enum dml_rotation_angle SourceScan, + // Output + dml_uint_t *MaxUncompressedBlockLuma, + dml_uint_t *MaxUncompressedBlockChroma, + dml_uint_t *MaxCompressedBlockLuma, + dml_uint_t *MaxCompressedBlockChroma, + dml_uint_t *IndependentBlockLuma, + dml_uint_t *IndependentBlockChroma); + +static dml_uint_t CalculatePrefetchSourceLines( + dml_float_t VRatio, + dml_uint_t VTaps, + dml_bool_t Interlace, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_uint_t SwathHeight, + enum dml_rotation_angle SourceScan, + dml_bool_t ViewportStationary, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + + // Output + dml_uint_t *VInitPreFill, + dml_uint_t *MaxNumSwath); + +static dml_uint_t CalculateVMAndRowBytes( + dml_bool_t ViewportStationary, + dml_bool_t DCCEnable, + dml_uint_t NumberOfDPPs, + dml_uint_t BlockHeight256Bytes, + dml_uint_t BlockWidth256Bytes, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceTiling, + dml_uint_t BytePerPixel, + enum dml_rotation_angle SourceScan, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t GPUVMMinPageSizeKBytes, + dml_uint_t PTEBufferSizeInRequests, + dml_uint_t Pitch, + dml_uint_t DCCMetaPitch, + dml_uint_t MacroTileWidth, + dml_uint_t MacroTileHeight, + + // Output + dml_uint_t *MetaRowByte, + dml_uint_t *PixelPTEBytesPerRow, + dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check + dml_uint_t *dpte_row_width_ub, + dml_uint_t *dpte_row_height, + dml_uint_t *dpte_row_height_linear, + dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + dml_uint_t *dpte_row_width_ub_one_row_per_frame, + dml_uint_t *dpte_row_height_one_row_per_frame, + dml_uint_t *MetaRequestWidth, + dml_uint_t *MetaRequestHeight, + dml_uint_t *meta_row_width, + dml_uint_t *meta_row_height, + dml_uint_t *PixelPTEReqWidth, + dml_uint_t *PixelPTEReqHeight, + dml_uint_t *PTERequestSize, + dml_uint_t *DPDE0BytesFrame, + dml_uint_t *MetaPTEBytesFrame); + +static dml_float_t CalculateTWait( + dml_uint_t PrefetchMode, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + dml_bool_t DRRDisplay, + dml_float_t DRAMClockChangeLatency, + dml_float_t FCLKChangeLatency, + dml_float_t UrgentLatency, + dml_float_t SREnterPlusExitTime); + +static void CalculatePrefetchMode( + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, + dml_uint_t *MinPrefetchMode, + dml_uint_t *MaxPrefetchMode); + +static void CalculateRowBandwidth( + dml_bool_t GPUVMEnable, + enum dml_source_format_class SourcePixelFormat, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_bool_t DCCEnable, + dml_float_t LineTime, + dml_uint_t MetaRowByteLuma, + dml_uint_t MetaRowByteChroma, + dml_uint_t meta_row_height_luma, + dml_uint_t meta_row_height_chroma, + dml_uint_t PixelPTEBytesPerRowLuma, + dml_uint_t PixelPTEBytesPerRowChroma, + dml_uint_t dpte_row_height_luma, + dml_uint_t dpte_row_height_chroma, + // Output + dml_float_t *meta_row_bw, + dml_float_t *dpte_row_bw); + +static void CalculateFlipSchedule( + dml_float_t HostVMInefficiencyFactor, + dml_float_t UrgentExtraLatency, + dml_float_t UrgentLatency, + dml_uint_t GPUVMMaxPageTableLevels, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMaxNonCachedPageTableLevels, + dml_bool_t GPUVMEnable, + dml_uint_t HostVMMinPageSize, + dml_float_t PDEAndMetaPTEBytesPerFrame, + dml_float_t MetaRowBytes, + dml_float_t DPTEBytesPerRow, + dml_float_t BandwidthAvailableForImmediateFlip, + dml_uint_t TotImmediateFlipBytes, + enum dml_source_format_class SourcePixelFormat, + dml_float_t LineTime, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t Tno_bw, + dml_bool_t DCCEnable, + dml_uint_t dpte_row_height, + dml_uint_t meta_row_height, + dml_uint_t dpte_row_height_chroma, + dml_uint_t meta_row_height_chroma, + dml_bool_t use_one_row_for_frame_flip, + + // Output + dml_float_t *DestinationLinesToRequestVMInImmediateFlip, + dml_float_t *DestinationLinesToRequestRowInImmediateFlip, + dml_float_t *final_flip_bw, + dml_bool_t *ImmediateFlipSupportedForPipe); + +static dml_float_t CalculateWriteBackDelay( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackDestinationWidth, + dml_uint_t WritebackDestinationHeight, + dml_uint_t WritebackSourceHeight, + dml_uint_t HTotal); + +static void CalculateVUpdateAndDynamicMetadataParameters( + dml_uint_t MaxInterDCNTileRepeaters, + dml_float_t Dppclk, + dml_float_t DISPCLK, + dml_float_t DCFClkDeepSleep, + dml_float_t PixelClock, + dml_uint_t HTotal, + dml_uint_t VBlank, + dml_uint_t DynamicMetadataTransmittedBytes, + dml_uint_t DynamicMetadataLinesBeforeActiveRequired, + dml_uint_t InterlaceEnable, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_float_t *TSetup, + dml_float_t *Tdmbf, + dml_float_t *Tdmec, + dml_float_t *Tdmsks, + dml_uint_t *VUpdateOffsetPix, + dml_uint_t *VUpdateWidthPix, + dml_uint_t *VReadyOffsetPix); + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported); + +static dml_float_t TruncToValidBPP( + dml_float_t LinkBitRate, + dml_uint_t Lanes, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClock, + dml_float_t DesiredBPP, + dml_bool_t DSCEnable, + enum dml_output_encoder_class Output, + enum dml_output_format_class Format, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t DSCSlices, + dml_uint_t AudioRate, + dml_uint_t AudioLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + // Output + dml_uint_t *RequiredSlotsSingle); + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct display_mode_lib_scratch_st *s, + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p); + +static void CalculateDCFCLKDeepSleep( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t ReturnBusWidth, + + // Output + dml_float_t *DCFCLKDeepSleep); + +static void CalculateUrgentBurstFactor( + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_uint_t swath_width_luma_ub, + dml_uint_t swath_width_chroma_ub, + dml_uint_t SwathHeightY, + dml_uint_t SwathHeightC, + dml_float_t LineTime, + dml_float_t UrgentLatency, + dml_float_t CursorBufferSize, + dml_uint_t CursorWidth, + dml_uint_t CursorBPP, + dml_float_t VRatio, + dml_float_t VRatioC, + dml_float_t BytePerPixelInDETY, + dml_float_t BytePerPixelInDETC, + dml_uint_t DETBufferSizeY, + dml_uint_t DETBufferSizeC, + // Output + dml_float_t *UrgentBurstFactorCursor, + dml_float_t *UrgentBurstFactorLuma, + dml_float_t *UrgentBurstFactorChroma, + dml_bool_t *NotEnoughUrgentLatencyHiding); + +static dml_float_t RequiredDTBCLK( + dml_bool_t DSCEnable, + dml_float_t PixelClock, + enum dml_output_format_class OutputFormat, + dml_float_t OutputBpp, + dml_uint_t DSCSlices, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_uint_t AudioRate, + dml_uint_t AudioLayoutSingle); + +static void UseMinimumDCFCLK( + struct display_mode_lib_scratch_st *scratch, + struct UseMinimumDCFCLK_params_st *p); + +static void CalculatePixelDeliveryTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t VRatioPrefetchY[], + dml_float_t VRatioPrefetchC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t NumberOfCursors[], + dml_uint_t CursorWidth[], + dml_uint_t CursorBPP[], + dml_uint_t BlockWidth256BytesY[], + dml_uint_t BlockHeight256BytesY[], + dml_uint_t BlockWidth256BytesC[], + dml_uint_t BlockHeight256BytesC[], + + // Output + dml_float_t DisplayPipeLineDeliveryTimeLuma[], + dml_float_t DisplayPipeLineDeliveryTimeChroma[], + dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeLuma[], + dml_float_t DisplayPipeRequestDeliveryTimeChroma[], + dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], + dml_float_t CursorRequestDeliveryTime[], + dml_float_t CursorRequestDeliveryTimePrefetch[]); + +static void CalculateMetaAndPTETimes( + dml_bool_t use_one_row_for_frame[], + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t MetaChunkSize, + dml_uint_t MinMetaChunkSizeBytes, + dml_uint_t HTotal[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t DestinationLinesToRequestRowInVBlank[], + dml_float_t DestinationLinesToRequestRowInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t dpte_row_height[], + dml_uint_t dpte_row_height_chroma[], + dml_uint_t meta_row_width[], + dml_uint_t meta_row_width_chroma[], + dml_uint_t meta_row_height[], + dml_uint_t meta_row_height_chroma[], + dml_uint_t meta_req_width[], + dml_uint_t meta_req_width_chroma[], + dml_uint_t meta_req_height[], + dml_uint_t meta_req_height_chroma[], + dml_uint_t dpte_group_bytes[], + dml_uint_t PTERequestSizeY[], + dml_uint_t PTERequestSizeC[], + dml_uint_t PixelPTEReqWidthY[], + dml_uint_t PixelPTEReqHeightY[], + dml_uint_t PixelPTEReqWidthC[], + dml_uint_t PixelPTEReqHeightC[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + + // Output + dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], + dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], + dml_float_t DST_Y_PER_META_ROW_NOM_L[], + dml_float_t DST_Y_PER_META_ROW_NOM_C[], + dml_float_t TimePerMetaChunkNominal[], + dml_float_t TimePerChromaMetaChunkNominal[], + dml_float_t TimePerMetaChunkVBlank[], + dml_float_t TimePerChromaMetaChunkVBlank[], + dml_float_t TimePerMetaChunkFlip[], + dml_float_t TimePerChromaMetaChunkFlip[], + dml_float_t time_per_pte_group_nom_luma[], + dml_float_t time_per_pte_group_vblank_luma[], + dml_float_t time_per_pte_group_flip_luma[], + dml_float_t time_per_pte_group_nom_chroma[], + dml_float_t time_per_pte_group_vblank_chroma[], + dml_float_t time_per_pte_group_flip_chroma[]); + +static void CalculateVMGroupAndRequestTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t HTotal[], + dml_uint_t BytePerPixelC[], + dml_float_t DestinationLinesToRequestVMInVBlank[], + dml_float_t DestinationLinesToRequestVMInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + dml_uint_t vm_group_bytes[], + dml_uint_t dpde0_bytes_per_frame_ub_l[], + dml_uint_t dpde0_bytes_per_frame_ub_c[], + dml_uint_t meta_pte_bytes_per_frame_ub_l[], + dml_uint_t meta_pte_bytes_per_frame_ub_c[], + + // Output + dml_float_t TimePerVMGroupVBlank[], + dml_float_t TimePerVMGroupFlip[], + dml_float_t TimePerVMRequestVBlank[], + dml_float_t TimePerVMRequestFlip[]); + +static void CalculateStutterEfficiency( + struct display_mode_lib_scratch_st *scratch, + struct CalculateStutterEfficiency_params_st *p); + +static void CalculateSwathAndDETConfiguration( + struct display_mode_lib_scratch_st *scratch, + struct CalculateSwathAndDETConfiguration_params_st *p); + +static void CalculateSwathWidth( + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + enum dml_source_format_class SourcePixelFormat[], + enum dml_rotation_angle SourceScan[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportWidth[], + dml_uint_t ViewportHeight[], + dml_uint_t ViewportXStart[], + dml_uint_t ViewportYStart[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + enum dml_odm_mode ODMMode[], + dml_uint_t BytePerPixY[], + dml_uint_t BytePerPixC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t BlendingAndTiming[], + dml_uint_t HActive[], + dml_float_t HRatio[], + dml_uint_t DPPPerSurface[], + + // Output + dml_uint_t SwathWidthSingleDPPY[], + dml_uint_t SwathWidthSingleDPPC[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t MaximumSwathHeightY[], + dml_uint_t MaximumSwathHeightC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[]); + +static dml_float_t CalculateExtraLatency( + dml_uint_t RoundTripPingLatencyCycles, + dml_uint_t ReorderingBytes, + dml_float_t DCFCLK, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_float_t ReturnBW, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels); + +static dml_uint_t CalculateExtraLatencyBytes( + dml_uint_t ReorderingBytes, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels); + +static dml_float_t CalculateUrgentLatency( + dml_float_t UrgentLatencyPixelDataOnly, + dml_float_t UrgentLatencyPixelMixedWithVMData, + dml_float_t UrgentLatencyVMDataOnly, + dml_bool_t DoUrgentLatencyAdjustment, + dml_float_t UrgentLatencyAdjustmentFabricClockComponent, + dml_float_t UrgentLatencyAdjustmentFabricClockReference, + dml_float_t FabricClockSingle); + +static dml_bool_t UnboundedRequest( + enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, + dml_uint_t TotalNumberOfActiveDPP, + dml_bool_t NoChromaOrLinear, + enum dml_output_encoder_class Output); + +static void CalculateSurfaceSizeInMall( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCN, + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + dml_bool_t DCCEnable[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportXStartY[], + dml_uint_t ViewportYStartY[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t ViewportWidthY[], + dml_uint_t ViewportHeightY[], + dml_uint_t BytesPerPixelY[], + dml_uint_t ViewportWidthC[], + dml_uint_t ViewportHeightC[], + dml_uint_t BytesPerPixelC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t ReadBlockWidthY[], + dml_uint_t ReadBlockWidthC[], + dml_uint_t ReadBlockHeightY[], + dml_uint_t ReadBlockHeightC[], + + // Output + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t *ExceededMALLSize); + +static void CalculateDETBufferSize( + dml_uint_t DETSizeOverride[], + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t UnboundedRequestEnabled, + dml_uint_t nomDETInKByte, + dml_uint_t MaxTotalDETInKByte, + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t MinCompressedBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInkByte, + dml_uint_t CompressedBufferSegmentSizeInkByteFinal, + enum dml_source_format_class SourcePixelFormat[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t RotesY[], + dml_uint_t RoundedUpMaxSwathSizeBytesC[], + dml_uint_t DPPPerSurface[], + // Output + dml_uint_t DETBufferSizeInKByte[], + dml_uint_t *CompressedBufferSizeInkByte); + +static void CalculateMaxDETAndMinCompressedBufferSize( + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInKByte, + dml_uint_t ROBBufferSizeInKByte, + dml_uint_t MaxNumDPP, + dml_bool_t nomDETInKByteOverrideEnable, + dml_uint_t nomDETInKByteOverrideValue, + + // Output + dml_uint_t *MaxTotalDETInKByte, + dml_uint_t *nomDETInKByte, + dml_uint_t *MinCompressedBufferSizeInKByte); + +static dml_uint_t DSCDelayRequirement( + dml_bool_t DSCEnabled, + enum dml_odm_mode ODMMode, + dml_uint_t DSCInputBitPerComponent, + dml_float_t OutputBpp, + dml_uint_t HActive, + dml_uint_t HTotal, + dml_uint_t NumberOfDSCSlices, + enum dml_output_format_class OutputFormat, + enum dml_output_encoder_class Output, + dml_float_t PixelClock, + dml_float_t PixelClockBackEnd); + +static dml_bool_t CalculateVActiveBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[]); + +static void CalculatePrefetchBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *PrefetchBandwidth, + dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *PrefetchBandwidthSupport); + +static dml_float_t CalculateBandwidthAvailableForImmediateFlip( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t cursor_bw_pre[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[]); + +static void CalculateImmediateFlipBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + enum dml_immediate_flip_requirement ImmediateFlipRequirement[], + dml_float_t final_flip_bw[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *TotalBandwidth, + dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *ImmediateFlipBandwidthSupport); + +// --------------------------- +// Declaration Ends +// --------------------------- + +static dml_uint_t dscceComputeDelay( + dml_uint_t bpc, + dml_float_t BPP, + dml_uint_t sliceWidth, + dml_uint_t numSlices, + enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + dml_uint_t rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, + Delay, pixels; + + if (pixelFormat == dml_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml_444) + pixelsPerClock = 1; + else if (pixelFormat == dml_n422) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1)); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422) + s = 0; + else + s = 1; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + L = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: bpc: %u\n", __func__, bpc); + dml_print("DML::%s: BPP: %f\n", __func__, BPP); + dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + dml_print("DML::%s: numSlices: %u\n", __func__, numSlices); + dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + dml_print("DML::%s: Output: %u\n", __func__, Output); + dml_print("DML::%s: pixels: %u\n", __func__, pixels); +#endif + return pixels; +} + +static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output) +{ + dml_uint_t Delay = 0; + + if (pixelFormat == dml_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dml_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + dml_print("DML::%s: Delay = %u\n", __func__, Delay); +#endif + + return Delay; +} + +static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, + struct CalculatePrefetchSchedule_params_st *p) +{ + struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals; + + s->MyError = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingMetaPTE = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + s->Tvm_trips = 0.0; + s->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + s->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + s->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est3 = 0.0; + + if (p->GPUVMEnable == true && p->HostVMEnable == true) { + s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels); + dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup); + dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable); + dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->UrgentLatency; + s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1); + + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem; + } else { + *p->Tdmdl = p->TWait + p->UrgentExtraLatency; + } + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (DynamicMetadataEnable == false) { + *Tdmdl = 0.0; + } +#endif + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0); + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0); + *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); +#endif + + if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + + s->MyError = false; + + s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + + if (p->GPUVMEnable == true) { + s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + if (p->GPUVMPageTableLevels >= 3) { + *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1); + } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) { + s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tno_bw = p->UrgentExtraLatency; + } else { + *p->Tno_bw = 0; + } + } else if (p->myPipe->DCCEnable == true) { + s->Tvm_trips_rounded = s->LineTime / 4.0; + s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tno_bw = 0; + } else { + s->Tvm_trips_rounded = s->LineTime / 4.0; + s->Tr0_trips_rounded = s->LineTime / 2.0; + *p->Tno_bw = 0; + } + s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0); + s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0); + + if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface; + if (p->myPipe->VRatio < 1.0) + s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; + + s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); + + s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); + + s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0); + s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + + if (p->GPUVMEnable == true) { + s->Tvm_oto = dml_max3( + s->Tvm_trips, + *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); + } else + s->Tvm_oto = s->LineTime / 4.0; + + if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { + s->Tr0_oto = dml_max4( + s->Tr0_trips, + (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto, + (s->LineTime - s->Tvm_oto)/2.0, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0; + + s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal); + s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw); + dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); +#endif + + s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + + dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime); + dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup); + dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + + s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor); + + if (s->prefetch_sw_bytes < s->dep_bytes) { + s->prefetch_sw_bytes = 2 * s->dep_bytes; + } + + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + if (s->dst_y_prefetch_equ > 1) { + + if (s->Tpre_rounded - *p->Tno_bw > 0) { + s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + + s->prefetch_sw_bytes) + / (s->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1; + } else + s->PrefetchBandwidth1 = 0; + + if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) { + s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / + (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + } + + if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); + else + s->PrefetchBandwidth2 = 0; + + if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3; + } + else + s->PrefetchBandwidth3 = 0; + + + if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) { + s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + } + + if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->PrefetchBandwidth4 = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded); + dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1); + dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2); + dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3); + dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4); +#endif + { + dml_bool_t Case1OK; + dml_bool_t Case2OK; + dml_bool_t Case3OK; + + if (s->PrefetchBandwidth1 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) { + Case1OK = true; + } else { + Case1OK = false; + } + } else { + Case1OK = false; + } + + if (s->PrefetchBandwidth2 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) { + Case2OK = true; + } else { + Case2OK = false; + } + } else { + Case2OK = false; + } + + if (s->PrefetchBandwidth3 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) { + Case3OK = true; + } else { + Case3OK = false; + } + } else { + Case3OK = false; + } + + if (Case1OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth3; + } else { + s->prefetch_bw_equ = s->PrefetchBandwidth4; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK); + dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK); + dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + + if (s->prefetch_bw_equ > 0) { + if (p->GPUVMEnable == true) { + s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { + s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4); + } else { + s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } + + + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingMetaPTE = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ; + s->TimeForFetchingMetaPTE = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) { + *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } + } + + s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); +#endif + + if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, + (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData + * p->myPipe->BytePerPixelY + * p->swath_width_luma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma); +#endif + *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData + *p->myPipe->BytePerPixelC + *p->swath_width_chroma_ub / s->LineTime; + } else { + s->MyError = true; + dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE); + dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank); + dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime); + dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + s->TimeForFetchingMetaPTE = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + { + dml_float_t prefetch_vm_bw; + dml_float_t prefetch_row_bw; + + if (p->PDEAndMetaPTEBytesFrame == 0) { + prefetch_vm_bw = 0; + } else if (*p->DestinationLinesToRequestVMInVBlank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->MyError = true; + dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + } + + if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) { + prefetch_row_bw = 0; + } else if (*p->DestinationLinesToRequestRowInVBlank > 0) { + prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->MyError = true; + dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + } + + *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->MyError) { + s->TimeForFetchingMetaPTE = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + *p->DestinationLinesForPrefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + return s->MyError; +} // CalculatePrefetchSchedule + +static void CalculateBytePerPixelAndBlockSizes( + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + + // Output + dml_uint_t *BytePerPixelY, + dml_uint_t *BytePerPixelC, + dml_float_t *BytePerPixelDETY, + dml_float_t *BytePerPixelDETC, + dml_uint_t *BlockHeight256BytesY, + dml_uint_t *BlockHeight256BytesC, + dml_uint_t *BlockWidth256BytesY, + dml_uint_t *BlockWidth256BytesC, + dml_uint_t *MacroTileHeightY, + dml_uint_t *MacroTileHeightC, + dml_uint_t *MacroTileWidthY, + dml_uint_t *MacroTileWidthC) +{ + if (SourcePixelFormat == dml_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + *BytePerPixelDETY = (dml_float_t) (4.0 / 3); + *BytePerPixelDETC = (dml_float_t) (8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); +#endif + if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32 + || SourcePixelFormat == dml_444_16 + || SourcePixelFormat == dml_444_8 + || SourcePixelFormat == dml_mono_16 + || SourcePixelFormat == dml_mono_8 + || SourcePixelFormat == dml_rgbe)) { + if (SurfaceTiling == dml_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { + if (SurfaceTiling == dml_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); +#endif + + if (SurfaceTiling == dml_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +#endif +} // CalculateBytePerPixelAndBlockSizes + +static noinline_for_stack dml_float_t CalculateTWait( + dml_uint_t PrefetchMode, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + dml_bool_t DRRDisplay, + dml_float_t DRAMClockChangeLatency, + dml_float_t FCLKChangeLatency, + dml_float_t UrgentLatency, + dml_float_t SREnterPlusExitTime) +{ + dml_float_t TWait = 0.0; + + if (PrefetchMode == 0 && + !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) && + !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { + TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max(SREnterPlusExitTime, UrgentLatency); + } else { + TWait = UrgentLatency; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode); + dml_print("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} // CalculateTWait + + +/// @brief Calculate the "starting point" for prefetch calculation +/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation +/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from +/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving +/// features). +static void CalculatePrefetchMode( + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, + dml_uint_t *MinPrefetchMode, + dml_uint_t *MaxPrefetchMode) +{ + if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) { + *MinPrefetchMode = 0; // consider all pwr saving features + *MaxPrefetchMode = 3; // consider just urgent latency + } else { + if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) { + *MinPrefetchMode = 3; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) { + *MinPrefetchMode = 2; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) { + *MinPrefetchMode = 1; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) { + *MinPrefetchMode = 0; + } else { + dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank); + ASSERT(0); + } + *MaxPrefetchMode = *MinPrefetchMode; + } +} // CalculatePrefetchMode + +static dml_float_t CalculateWriteBackDISPCLK( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t PixelClock, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackHTaps, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackSourceWidth, + dml_uint_t WritebackDestinationWidth, + dml_uint_t HTotal, + dml_uint_t WritebackLineBufferSize, + dml_float_t DISPCLKDPPCLKVCOSpeed) +{ + dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth; + return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); +} + +static dml_float_t CalculateWriteBackDelay( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackDestinationWidth, + dml_uint_t WritebackDestinationHeight, + dml_uint_t WritebackSourceHeight, + dml_uint_t HTotal) +{ + dml_float_t CalculateWriteBackDelay; + dml_float_t Line_length; + dml_float_t Output_lines_last_notclamped; + dml_float_t WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static void CalculateVUpdateAndDynamicMetadataParameters( + dml_uint_t MaxInterDCNTileRepeaters, + dml_float_t Dppclk, + dml_float_t Dispclk, + dml_float_t DCFClkDeepSleep, + dml_float_t PixelClock, + dml_uint_t HTotal, + dml_uint_t VBlank, + dml_uint_t DynamicMetadataTransmittedBytes, + dml_uint_t DynamicMetadataLinesBeforeActiveRequired, + dml_uint_t InterlaceEnable, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + + // Output + dml_float_t *TSetup, + dml_float_t *Tdmbf, + dml_float_t *Tdmec, + dml_float_t *Tdmsks, + dml_uint_t *VUpdateOffsetPix, + dml_uint_t *VUpdateWidthPix, + dml_uint_t *VReadyOffsetPix) +{ + dml_float_t TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + dml_print("DML::%s: VBlank = %u\n", __func__, VBlank); + dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); + dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk); + dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static void CalculateRowBandwidth( + dml_bool_t GPUVMEnable, + enum dml_source_format_class SourcePixelFormat, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_bool_t DCCEnable, + dml_float_t LineTime, + dml_uint_t MetaRowByteLuma, + dml_uint_t MetaRowByteChroma, + dml_uint_t meta_row_height_luma, + dml_uint_t meta_row_height_chroma, + dml_uint_t PixelPTEBytesPerRowLuma, + dml_uint_t PixelPTEBytesPerRowChroma, + dml_uint_t dpte_row_height_luma, + dml_uint_t dpte_row_height_chroma, + // Output + dml_float_t *meta_row_bw, + dml_float_t *dpte_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatioChroma * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule +/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes +static void CalculateFlipSchedule( + dml_float_t HostVMInefficiencyFactor, + dml_float_t UrgentExtraLatency, + dml_float_t UrgentLatency, + dml_uint_t GPUVMMaxPageTableLevels, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMaxNonCachedPageTableLevels, + dml_bool_t GPUVMEnable, + dml_uint_t HostVMMinPageSize, + dml_float_t PDEAndMetaPTEBytesPerFrame, + dml_float_t MetaRowBytes, + dml_float_t DPTEBytesPerRow, + dml_float_t BandwidthAvailableForImmediateFlip, + dml_uint_t TotImmediateFlipBytes, + enum dml_source_format_class SourcePixelFormat, + dml_float_t LineTime, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t Tno_bw, + dml_bool_t DCCEnable, + dml_uint_t dpte_row_height, + dml_uint_t meta_row_height, + dml_uint_t dpte_row_height_chroma, + dml_uint_t meta_row_height_chroma, + dml_bool_t use_one_row_for_frame_flip, + + // Output + dml_float_t *DestinationLinesToRequestVMInImmediateFlip, + dml_float_t *DestinationLinesToRequestRowInImmediateFlip, + dml_float_t *final_flip_bw, + dml_bool_t *ImmediateFlipSupportedForPipe) +{ + dml_float_t min_row_time = 0.0; + dml_uint_t HostVMDynamicLevelsTrips = 0; + dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0; + dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0; + dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe + + if (GPUVMEnable == true && HostVMEnable == true) { + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + } else { + HostVMDynamicLevelsTrips = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); +#endif + + if (TotImmediateFlipBytes > 0) { + if (use_one_row_for_frame_flip) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; + } else { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; + } + if (GPUVMEnable == true) { + TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + LineTime / 4.0); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; + *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); + } else if ((GPUVMEnable == true || DCCEnable == true)) { + *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + TimeForFetchingRowInVBlankImmediateFlip = 0; + *DestinationLinesToRequestVMInImmediateFlip = 0; + *DestinationLinesToRequestRowInImmediateFlip = 0; + *final_flip_bw = 0; + } + + if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); + } else { + min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); + } + } else { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dpte_row_height * LineTime / VRatio; + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = meta_row_height * LineTime / VRatio; + } else { + min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); + } + } + + if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); + + dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes); + dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW); + dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); + dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); + dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); + dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} // CalculateFlipSchedule + +static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed) +{ + if (Clock <= 0.0) + return 0.0; + else { + if (round_up) + return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); + else + return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); + } +} + +static void CalculateDCCConfiguration( + dml_bool_t DCCEnabled, + dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceWidthLuma, + dml_uint_t SurfaceWidthChroma, + dml_uint_t SurfaceHeightLuma, + dml_uint_t SurfaceHeightChroma, + dml_uint_t nomDETInKByte, + dml_uint_t RequestHeight256ByteLuma, + dml_uint_t RequestHeight256ByteChroma, + enum dml_swizzle_mode TilingFormat, + dml_uint_t BytePerPixelY, + dml_uint_t BytePerPixelC, + dml_float_t BytePerPixelDETY, + dml_float_t BytePerPixelDETC, + enum dml_rotation_angle SourceScan, + // Output + dml_uint_t *MaxUncompressedBlockLuma, + dml_uint_t *MaxUncompressedBlockChroma, + dml_uint_t *MaxCompressedBlockLuma, + dml_uint_t *MaxCompressedBlockChroma, + dml_uint_t *IndependentBlockLuma, + dml_uint_t *IndependentBlockChroma) +{ + dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024; + + dml_uint_t yuv420; + dml_uint_t horz_div_l; + dml_uint_t horz_div_c; + dml_uint_t vert_div_l; + dml_uint_t vert_div_c; + + dml_uint_t swath_buf_size; + dml_float_t detile_buf_vp_horz_limit; + dml_float_t detile_buf_vp_vert_limit; + + dml_uint_t MAS_vp_horz_limit; + dml_uint_t MAS_vp_vert_limit; + dml_uint_t max_vp_horz_width; + dml_uint_t max_vp_vert_height; + dml_uint_t eff_surf_width_l; + dml_uint_t eff_surf_width_c; + dml_uint_t eff_surf_height_l; + dml_uint_t eff_surf_height_c; + + dml_uint_t full_swath_bytes_horz_wc_l; + dml_uint_t full_swath_bytes_horz_wc_c; + dml_uint_t full_swath_bytes_vert_wc_l; + dml_uint_t full_swath_bytes_vert_wc_c; + + dml_uint_t req128_horz_wc_l; + dml_uint_t req128_horz_wc_c; + dml_uint_t req128_vert_wc_l; + dml_uint_t req128_vert_wc_c; + + dml_uint_t segment_order_horz_contiguous_luma; + dml_uint_t segment_order_horz_contiguous_chroma; + dml_uint_t segment_order_vert_contiguous_luma; + dml_uint_t segment_order_vert_contiguous_chroma; + + typedef enum{ + REQ_256Bytes, + REQ_128BytesNonContiguous, + REQ_128BytesContiguous, + REQ_NA + } RequestType; + + RequestType RequestLuma; + RequestType RequestChroma; + + yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml_420_10) { + full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else if (!dml_is_vertical_rotation(SourceScan)) { + if (req128_horz_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else { + if (req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } + + if (RequestLuma == REQ_256Bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (RequestLuma == REQ_128BytesContiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (RequestChroma == REQ_256Bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (RequestChroma == REQ_128BytesContiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} // CalculateDCCConfiguration + +static dml_uint_t CalculatePrefetchSourceLines( + dml_float_t VRatio, + dml_uint_t VTaps, + dml_bool_t Interlace, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_uint_t SwathHeight, + enum dml_rotation_angle SourceScan, + dml_bool_t ViewportStationary, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + + // Output + dml_uint_t *VInitPreFill, + dml_uint_t *MaxNumSwath) +{ + + dml_uint_t vp_start_rot = 0; + dml_uint_t sw0_tmp = 0; + dml_uint_t MaxPartialSwath = 0; + dml_float_t numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); + dml_print("DML::%s: VTaps = %u\n", __func__, VTaps); + dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) { + vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) { + vp_start_rot = ViewportXStart; + } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) { + vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (dml_uint_t)(numLines); + +} // CalculatePrefetchSourceLines + +static dml_uint_t CalculateVMAndRowBytes( + dml_bool_t ViewportStationary, + dml_bool_t DCCEnable, + dml_uint_t NumberOfDPPs, + dml_uint_t BlockHeight256Bytes, + dml_uint_t BlockWidth256Bytes, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceTiling, + dml_uint_t BytePerPixel, + enum dml_rotation_angle SourceScan, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t GPUVMMinPageSizeKBytes, + dml_uint_t PTEBufferSizeInRequests, + dml_uint_t Pitch, + dml_uint_t DCCMetaPitch, + dml_uint_t MacroTileWidth, + dml_uint_t MacroTileHeight, + + // Output + dml_uint_t *MetaRowByte, + dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation + dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check + dml_uint_t *dpte_row_width_ub, + dml_uint_t *dpte_row_height, + dml_uint_t *dpte_row_height_linear, + dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + dml_uint_t *dpte_row_width_ub_one_row_per_frame, + dml_uint_t *dpte_row_height_one_row_per_frame, + dml_uint_t *MetaRequestWidth, + dml_uint_t *MetaRequestHeight, + dml_uint_t *meta_row_width, + dml_uint_t *meta_row_height, + dml_uint_t *PixelPTEReqWidth, + dml_uint_t *PixelPTEReqHeight, + dml_uint_t *PTERequestSize, + dml_uint_t *DPDE0BytesFrame, + dml_uint_t *MetaPTEBytesFrame) +{ + dml_uint_t MPDEBytesFrame; + dml_uint_t DCCMetaSurfaceBytes; + dml_uint_t ExtraDPDEBytesFrame; + dml_uint_t PDEAndMetaPTEBytesFrame; + dml_uint_t MacroTileSizeBytes; + dml_uint_t vp_height_meta_ub; + dml_uint_t vp_height_dpte_ub; + + dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + *MetaRequestHeight = 8 * BlockHeight256Bytes; + *MetaRequestWidth = 8 * BlockWidth256Bytes; + if (SurfaceTiling == dml_sw_linear) { + *meta_row_height = 32; + *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + *meta_row_height = *MetaRequestHeight; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); + } else { + *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth); + } + *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0); + } else { + *meta_row_height = *MetaRequestWidth; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight)); + } else { + *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight); + } + *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0); + } + + if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { + vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); + } else { + vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); + } + + DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0); + + if (GPUVMEnable == true) { + *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64); + MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + + if (DCCEnable != true) { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; + + if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { + vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight); + } else { + vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight); + } + + if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { + *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1)); + ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); + } else { + *DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear); + dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel); + dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels); + dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame); + dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame); + dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame); + dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight); + dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth); + dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); +#endif + + if (SurfaceTiling == dml_sw_linear) { + *PixelPTEReqHeight = 1; + *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + *PTERequestSize = 64; + } else if (GPUVMMinPageSizeKBytes == 4) { + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; + *PTERequestSize = 128; + } else { + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); + *PTERequestSize = 64; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight); + dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth); + dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize); + dml_print("DML::%s: Pitch = %u\n", __func__, Pitch); +#endif + + *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth); + *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); + + if (SurfaceTiling == dml_sw_linear) { + *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1))); + dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); + dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); + dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); + + *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth); + *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1); + if (*dpte_row_height_linear > 128) + *dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(SourceScan)) { + *dpte_row_height = *PixelPTEReqHeight; + + if (GPUVMMinPageSizeKBytes > 64) { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth); + } else if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth)); + } else { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub); +#endif + + ASSERT(*PixelPTEReqWidth); + if (*PixelPTEReqWidth != 0) + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else { + *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth)); + + if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight)); + } else { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight); + } + + *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub); +#endif + } + + if (GPUVMEnable != true) + *PixelPTEBytesPerRow = 0; + + *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); + dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear); + dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow); + dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage); + dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame); + dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame); + dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame); +#endif + + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); + + return PDEAndMetaPTEBytesFrame; +} // CalculateVMAndRowBytes + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported) +{ + dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg); + + //Progressive To Interlace Unit Effect + for (dml_uint_t k = 0; k < num_active_planes; ++k) { + display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k]; + if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) { + display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k]; + } + } +} + +static dml_float_t TruncToValidBPP( + dml_float_t LinkBitRate, + dml_uint_t Lanes, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClock, + dml_float_t DesiredBPP, + dml_bool_t DSCEnable, + enum dml_output_encoder_class Output, + enum dml_output_format_class Format, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t DSCSlices, + dml_uint_t AudioRate, + dml_uint_t AudioLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + + // Output + dml_uint_t *RequiredSlots) +{ + dml_float_t MaxLinkBPP; + dml_uint_t MinDSCBPP; + dml_float_t MaxDSCBPP; + dml_uint_t NonDSCBPP0; + dml_uint_t NonDSCBPP1; + dml_uint_t NonDSCBPP2; + + if (Format == dml_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; + } else if (Format == dml_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; + } else { + if (Output == dml_hdmi) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml_n422) { + MinDSCBPP = 7; + MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; + } + } + + if (Output == dml_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + if (DSCEnable) { + if (ODMModeDSC == dml_odm_mode_combine_4to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + } else if (ODMModeDSC == dml_odm_mode_combine_2to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + } else if (ODMModeDSC == dml_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + } else { + if (ODMModeNoDSC == dml_odm_mode_combine_4to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + } + + *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1)); + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} // TruncToValidBPP + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct display_mode_lib_scratch_st *scratch, + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p) +{ + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); +#endif + + s->TotalActiveWriteback = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->WritebackEnable[k] == true) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal); + dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]); + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1)); + s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1)); + + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]); + dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]); + dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k]; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k]; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k]; + } + s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + + if (p->WritebackEnable[k]) { + s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0); + if (p->WritebackPixelFormat[k] == dml_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + } + + *p->USRRetrainingSupport = true; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) { + *p->USRRetrainingSupport = false; + } + } + + s->FoundCriticalSurface = false; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + s->FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + + for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) { + for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) { + if (i == j || + (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) || + (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) || + (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) || + (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) || + (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) { + s->SynchronizedSurfaces[i][j] = true; + } else { + s->SynchronizedSurfaces[i][j] = false; + } + } + } + + s->FCLKChangeSupportNumber = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) { + if (!(p->PrefetchMode[k] <= 1)) { + s->FCLKChangeSupportNumber = 3; + } else if (s->FCLKChangeSupportNumber == 0) { + s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1); + s->LastSurfaceWithoutMargin = k; + } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2)) + s->FCLKChangeSupportNumber = 3; + } + } + + if (s->FCLKChangeSupportNumber == 0) { + *p->FCLKChangeSupport = dml_fclock_change_vactive; + } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) { + *p->FCLKChangeSupport = dml_fclock_change_vblank; + } else { + *p->FCLKChangeSupport = dml_fclock_change_unsupported; + } + + s->DRAMClockChangeMethod = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) + s->DRAMClockChangeMethod = 1; + else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) + s->DRAMClockChangeMethod = 2; + } + + s->DRAMClockChangeSupportNumber = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) || + ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) || + ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) { + if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support + s->DRAMClockChangeSupportNumber = 3; + } else if (s->DRAMClockChangeSupportNumber == 0) { + s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1; + s->LastSurfaceWithoutMargin = k; + } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) { + s->DRAMClockChangeSupportNumber = 3; + } + } + } + + if (s->DRAMClockChangeMethod == 0) { // No MALL usage + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } else { // Any pipe using MALL subviewport + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1)); + s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k])); + s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]); + dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k])); + s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k]; + p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]); + dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport); + dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport); + dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport + +static void CalculateDCFCLKDeepSleep( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t ReturnBusWidth, + + // Output + dml_float_t *DCFClkDeepSleep) +{ + dml_float_t DisplayPipeLineDeliveryTimeLuma; + dml_float_t DisplayPipeLineDeliveryTimeChroma; + dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidth = 0.0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); + dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} // CalculateDCFCLKDeepSleep + +static void CalculateUrgentBurstFactor( + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_uint_t swath_width_luma_ub, + dml_uint_t swath_width_chroma_ub, + dml_uint_t SwathHeightY, + dml_uint_t SwathHeightC, + dml_float_t LineTime, + dml_float_t UrgentLatency, + dml_float_t CursorBufferSize, + dml_uint_t CursorWidth, + dml_uint_t CursorBPP, + dml_float_t VRatio, + dml_float_t VRatioC, + dml_float_t BytePerPixelInDETY, + dml_float_t BytePerPixelInDETC, + dml_uint_t DETBufferSizeY, + dml_uint_t DETBufferSizeC, + // Output + dml_float_t *UrgentBurstFactorCursor, + dml_float_t *UrgentBurstFactorLuma, + dml_float_t *UrgentBurstFactorChroma, + dml_bool_t *NotEnoughUrgentLatencyHiding) +{ + dml_float_t LinesInDETLuma; + dml_float_t LinesInDETChroma; + dml_uint_t LinesInCursorBuffer; + dml_float_t CursorBufferSizeInTime; + dml_float_t DETBufferSizeInTimeLuma; + dml_float_t DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); + if (VRatio > 0) { + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + } else { + *UrgentBurstFactorCursor = 1; + } + } + + LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + if (VRatio > 0) { + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + } else { + *UrgentBurstFactorLuma = 1; + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + if (VRatioC > 0) { + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } else { + *UrgentBurstFactorChroma = 1; + } + } +} // CalculateUrgentBurstFactor + +static void CalculatePixelDeliveryTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t VRatioPrefetchY[], + dml_float_t VRatioPrefetchC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t NumberOfCursors[], + dml_uint_t CursorWidth[], + dml_uint_t CursorBPP[], + dml_uint_t BlockWidth256BytesY[], + dml_uint_t BlockHeight256BytesY[], + dml_uint_t BlockWidth256BytesC[], + dml_uint_t BlockHeight256BytesC[], + + // Output + dml_float_t DisplayPipeLineDeliveryTimeLuma[], + dml_float_t DisplayPipeLineDeliveryTimeChroma[], + dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeLuma[], + dml_float_t DisplayPipeRequestDeliveryTimeChroma[], + dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], + dml_float_t CursorRequestDeliveryTime[], + dml_float_t CursorRequestDeliveryTimePrefetch[]) +{ + dml_float_t req_per_swath_ub; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); + dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); + dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; + } else { + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); +#endif + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (!dml_is_vertical_rotation(SourceScan[k])) { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; + } else { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); +#endif + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_uint_t cursor_req_per_width; + cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0)); + if (NumberOfCursors[k] > 0) { + if (VRatio[k] <= 1) { + CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + if (VRatioPrefetchY[k] <= 1) { + CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + } else { + CursorRequestDeliveryTime[k] = 0; + CursorRequestDeliveryTimePrefetch[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]); + dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); + dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); +#endif + } +} // CalculatePixelDeliveryTimes + +static void CalculateMetaAndPTETimes( + dml_bool_t use_one_row_for_frame[], + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t MetaChunkSize, + dml_uint_t MinMetaChunkSizeBytes, + dml_uint_t HTotal[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t DestinationLinesToRequestRowInVBlank[], + dml_float_t DestinationLinesToRequestRowInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t dpte_row_height[], + dml_uint_t dpte_row_height_chroma[], + dml_uint_t meta_row_width[], + dml_uint_t meta_row_width_chroma[], + dml_uint_t meta_row_height[], + dml_uint_t meta_row_height_chroma[], + dml_uint_t meta_req_width[], + dml_uint_t meta_req_width_chroma[], + dml_uint_t meta_req_height[], + dml_uint_t meta_req_height_chroma[], + dml_uint_t dpte_group_bytes[], + dml_uint_t PTERequestSizeY[], + dml_uint_t PTERequestSizeC[], + dml_uint_t PixelPTEReqWidthY[], + dml_uint_t PixelPTEReqHeightY[], + dml_uint_t PixelPTEReqWidthC[], + dml_uint_t PixelPTEReqHeightC[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + + // Output + dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], + dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], + dml_float_t DST_Y_PER_META_ROW_NOM_L[], + dml_float_t DST_Y_PER_META_ROW_NOM_C[], + dml_float_t TimePerMetaChunkNominal[], + dml_float_t TimePerChromaMetaChunkNominal[], + dml_float_t TimePerMetaChunkVBlank[], + dml_float_t TimePerChromaMetaChunkVBlank[], + dml_float_t TimePerMetaChunkFlip[], + dml_float_t TimePerChromaMetaChunkFlip[], + dml_float_t time_per_pte_group_nom_luma[], + dml_float_t time_per_pte_group_vblank_luma[], + dml_float_t time_per_pte_group_flip_luma[], + dml_float_t time_per_pte_group_nom_chroma[], + dml_float_t time_per_pte_group_vblank_chroma[], + dml_float_t time_per_pte_group_flip_chroma[]) +{ + dml_uint_t meta_chunk_width; + dml_uint_t min_meta_chunk_width; + dml_uint_t meta_chunk_per_row_int; + dml_uint_t meta_row_remainder; + dml_uint_t meta_chunk_threshold; + dml_uint_t meta_chunks_per_row_ub; + dml_uint_t meta_chunk_width_chroma; + dml_uint_t min_meta_chunk_width_chroma; + dml_uint_t meta_chunk_per_row_int_chroma; + dml_uint_t meta_row_remainder_chroma; + dml_uint_t meta_chunk_threshold_chroma; + dml_uint_t meta_chunks_per_row_ub_chroma; + dml_uint_t dpte_group_width_luma; + dml_uint_t dpte_groups_per_row_luma_ub; + dml_uint_t dpte_group_width_chroma; + dml_uint_t dpte_groups_per_row_chroma_ub; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; + } + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (DCCEnable[k] == true) { + meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; + meta_row_remainder = meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(SourceScan[k])) { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + if (BytePerPixelC[k] == 0) { + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(SourceScan[k])) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + } + } else { + TimePerMetaChunkNominal[k] = 0; + TimePerMetaChunkVBlank[k] = 0; + TimePerMetaChunkFlip[k] = 0; + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (GPUVMEnable == true) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]); + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0)); + } + + dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]); + dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]); + dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]); + dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]); + dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]); + dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + if (BytePerPixelC[k] == 0) { + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(SourceScan[k])) { + dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]); + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0)); + } + dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + } + } else { + time_per_pte_group_nom_luma[k] = 0; + time_per_pte_group_vblank_luma[k] = 0; + time_per_pte_group_flip_luma[k] = 0; + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]); + dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); + + dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t HTotal[], + dml_uint_t BytePerPixelC[], + dml_float_t DestinationLinesToRequestVMInVBlank[], + dml_float_t DestinationLinesToRequestVMInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + dml_uint_t vm_group_bytes[], + dml_uint_t dpde0_bytes_per_frame_ub_l[], + dml_uint_t dpde0_bytes_per_frame_ub_c[], + dml_uint_t meta_pte_bytes_per_frame_ub_l[], + dml_uint_t meta_pte_bytes_per_frame_ub_c[], + + // Output + dml_float_t TimePerVMGroupVBlank[], + dml_float_t TimePerVMGroupFlip[], + dml_float_t TimePerVMRequestVBlank[], + dml_float_t TimePerVMRequestFlip[]) +{ + dml_uint_t num_group_per_lower_vm_stage; + dml_uint_t num_req_per_lower_vm_stage; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); +#endif + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]); + dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) + + dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0)); + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); + } + } else { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); + } + } + } + + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } + } + + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + + if (GPUVMMaxPageTableLevels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} // CalculateVMGroupAndRequestTimes + +static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch, + struct CalculateStutterEfficiency_params_st *p) +{ + dml_float_t DETBufferingTimeY = 0; + dml_float_t SwathWidthYCriticalSurface = 0; + dml_float_t SwathHeightYCriticalSurface = 0; + dml_float_t VActiveTimeCriticalSurface = 0; + dml_float_t FrameTimeCriticalSurface = 0; + dml_uint_t BytePerPixelYCriticalSurface = 0; + dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0; + dml_uint_t DETBufferSizeYCriticalSurface = 0; + dml_float_t MinTTUVBlankCriticalSurface = 0; + dml_uint_t BlockWidth256BytesYCriticalSurface = 0; + dml_bool_t SinglePlaneCriticalSurface = 0; + dml_bool_t SinglePipeCriticalSurface = 0; + dml_float_t TotalCompressedReadBandwidth = 0; + dml_float_t TotalRowReadBandwidth = 0; + dml_float_t AverageDCCCompressionRate = 0; + dml_float_t EffectiveCompressedBufferSize = 0; + dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0; + dml_float_t StutterBurstTime = 0; + dml_uint_t TotalActiveWriteback = 0; + dml_float_t LinesInDETY = 0; + dml_float_t LinesInDETYRoundedDownToSwath = 0; + dml_float_t MaximumEffectiveCompressionLuma = 0; + dml_float_t MaximumEffectiveCompressionChroma = 0; + dml_float_t TotalZeroSizeRequestReadBandwidth = 0; + dml_float_t TotalZeroSizeCompressedReadBandwidth = 0; + dml_float_t AverageDCCZeroSizeFraction = 0; + dml_float_t AverageZeroSizeCompressionRate = 0; + + dml_bool_t FoundCriticalSurface = false; + + dml_uint_t TotalNumberOfActiveOTG = 0; + dml_float_t SinglePixelClock = 0; + dml_uint_t SingleHTotal = 0; + dml_uint_t SingleVTotal = 0; + dml_bool_t SameTiming = true; + + dml_float_t LastStutterPeriod = 0.0; + dml_float_t LastZ8StutterPeriod = 0.0; + + dml_uint_t SwathSizeCriticalSurface; + dml_uint_t LastChunkOfSwathSize; + dml_uint_t MissingPartOfLastSwathOfDETSize; + + TotalZeroSizeRequestReadBandwidth = 0; + TotalZeroSizeCompressedReadBandwidth = 0; + TotalRowReadBandwidth = 0; + TotalCompressedReadBandwidth = 0; + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (p->DCCEnable[k] == true) { + if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionLuma = 2; + } else { + MaximumEffectiveCompressionLuma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]); + dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionChroma = 2; + } else { + MaximumEffectiveCompressionChroma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]); + dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; + } + } else { + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth; + AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); + dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); + dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); + dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); + dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); + dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B); + dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte); +#endif + if (AverageDCCZeroSizeFraction == 1) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate; + } else if (AverageDCCZeroSizeFraction > 0) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) + + dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate, + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); + dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); + dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); +#endif + } else { + EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + + ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); +#endif + + *p->StutterPeriod = 0; + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]); + DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY); + dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); + dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]); + dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]); + dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]); + dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); + dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]); +#endif + + if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) { + dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = DETBufferingTimeY; + FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + SwathWidthYCriticalSurface = p->SwathWidthY[k]; + SwathHeightYCriticalSurface = p->SwathHeightY[k]; + BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); + DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface); + dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface); + dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface); + dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface); + dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface); + dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface); + dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface); + dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface); + dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface); + dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); +#endif + } + } + } + + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth); + dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); + dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); + dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); + dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth)); + dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); +#endif + StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); + + dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); + + TotalActiveWriteback = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->WritebackEnable[k]) { + TotalActiveWriteback = TotalActiveWriteback + 1; + } + } + + if (TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (p->BlendingAndTiming[k] == k) { + if (TotalNumberOfActiveOTG == 0) { + SinglePixelClock = p->PixelClock[k]; + SingleHTotal = p->HTotal[k]; + SingleVTotal = p->VTotal[k]; + } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + } + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + + if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && + LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } + } else { + *p->StutterEfficiency = 0; + } + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) { + *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } + } else { + *p->Z8StutterEfficiency = 0.; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface)); + LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); + MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface); + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && + (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); + dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface); + dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); + dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); + dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} // CalculateStutterEfficiency + +/// \CalculateSwathAndDETConfiguration +/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.) +static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch, + struct CalculateSwathAndDETConfiguration_params_st *p) +{ + dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__]; + dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__]; + + dml_uint_t TotalActiveDPP = 0; + dml_bool_t NoChromaOrLinearSurfaces = true; + dml_uint_t SurfaceDoingUnboundedRequest = 0; + + dml_uint_t DETBufferSizeInKByteForSwathCalculation; + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth(p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->SourcePixelFormat, + p->SourceScan, + p->ViewportStationary, + p->ViewportWidth, + p->ViewportHeight, + p->ViewportXStart, + p->ViewportYStart, + p->ViewportXStartC, + p->ViewportYStartC, + p->SurfaceWidthY, + p->SurfaceWidthC, + p->SurfaceHeightY, + p->SurfaceHeightC, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->BlendingAndTiming, + p->HActive, + p->HRatio, + p->DPPPerSurface, + + // Output + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); + RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); +#endif + if (p->SourcePixelFormat[k] == dml_420_10) { + RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256)); + RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256)); + } + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 || + p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha + || p->SurfaceTiling[k] == dml_sw_linear) { + NoChromaOrLinearSurfaces = false; + } + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP, + NoChromaOrLinearSurfaces, p->Output[0]); + + CalculateDETBufferSize(p->DETSizeOverride, + p->UseMALLForPStateChange, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + *p->UnboundedRequestEnabled, + p->nomDETInKByte, + p->MaxTotalDETInKByte, + p->ConfigReturnBufferSizeInKByte, + p->MinCompressedBufferSizeInKByte, + p->ConfigReturnBufferSegmentSizeInkByte, + p->CompressedBufferSegmentSizeInkByteFinal, + p->SourcePixelFormat, + p->ReadBandwidthLuma, + p->ReadBandwidthChroma, + RoundedUpMaxSwathSizeBytesY, + RoundedUpMaxSwathSizeBytesC, + p->DPPPerSurface, + + // Output + p->DETBufferSizeInKByte, // per hubp pipe + p->CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + *p->ViewportSizeSupport = true; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + + if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; + } else { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; + } + + if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b, + (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64) + - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64)); + } + *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256; +} // CalculateSwathAndDETConfiguration + +static void CalculateSwathWidth( + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + enum dml_source_format_class SourcePixelFormat[], + enum dml_rotation_angle SourceScan[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportWidth[], + dml_uint_t ViewportHeight[], + dml_uint_t ViewportXStart[], + dml_uint_t ViewportYStart[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + enum dml_odm_mode ODMMode[], + dml_uint_t BytePerPixY[], + dml_uint_t BytePerPixC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t BlendingAndTiming[], + dml_uint_t HActive[], + dml_float_t HRatio[], + dml_uint_t DPPPerSurface[], + + // Output + dml_uint_t SwathWidthSingleDPPY[], + dml_uint_t SwathWidthSingleDPPC[], + dml_uint_t SwathWidthY[], // per-pipe + dml_uint_t SwathWidthC[], // per-pipe + dml_uint_t MaximumSwathHeightY[], + dml_uint_t MaximumSwathHeightC[], + dml_uint_t swath_width_luma_ub[], // per-pipe + dml_uint_t swath_width_chroma_ub[]) // per-pipe +{ + enum dml_odm_mode MainSurfaceODMMode; + dml_uint_t surface_width_ub_l; + dml_uint_t surface_height_ub_l; + dml_uint_t surface_width_ub_c = 0; + dml_uint_t surface_height_ub_c = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + SwathWidthSingleDPPY[k] = ViewportWidth[k]; + } else { + SwathWidthSingleDPPY[k] = ViewportHeight[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]); + dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]); + dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + MainSurfaceODMMode = ODMMode[k]; + for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { + if (BlendingAndTiming[k] == j) { + MainSurfaceODMMode = ODMMode[j]; + } + } + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) { + SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true))); + } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) { + SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]); + dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); +#endif + + if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); + surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); + + if (!dml_is_vertical_rotation(SourceScan[k])) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k]))); + } else { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k])); + } + if (BytePerPixC[k] > 0) { + surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k]))); + } else { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k])); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + if (BytePerPixC[k] > 0) { + surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]); + dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); +#endif + + } +} // CalculateSwathWidth + +static noinline_for_stack dml_float_t CalculateExtraLatency( + dml_uint_t RoundTripPingLatencyCycles, + dml_uint_t ReorderingBytes, + dml_float_t DCFCLK, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_float_t ReturnBW, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_float_t ExtraLatencyBytes; + dml_float_t ExtraLatency; + + ExtraLatencyBytes = CalculateExtraLatencyBytes( + ReorderingBytes, + TotalNumberOfActiveDPP, + PixelChunkSizeInKByte, + TotalNumberOfDCCActiveDPP, + MetaChunkSize, + GPUVMEnable, + HostVMEnable, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + HostVMInefficiencyFactor, + HostVMMinPageSize, + HostVMMaxNonCachedPageTableLevels); + + ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); + dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); +#endif + + return ExtraLatency; +} // CalculateExtraLatency + +static dml_uint_t CalculateHostVMDynamicLevels( + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_uint_t HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels); + dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; + + if (GPUVMEnable == true) { + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + } + } + return (dml_uint_t)(ret); +} + +static dml_float_t CalculateUrgentLatency( + dml_float_t UrgentLatencyPixelDataOnly, + dml_float_t UrgentLatencyPixelMixedWithVMData, + dml_float_t UrgentLatencyVMDataOnly, + dml_bool_t DoUrgentLatencyAdjustment, + dml_float_t UrgentLatencyAdjustmentFabricClockComponent, + dml_float_t UrgentLatencyAdjustmentFabricClockReference, + dml_float_t FabricClock) +{ + dml_float_t ret; + + ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + return ret; +} + +static dml_float_t RequiredDTBCLK( + dml_bool_t DSCEnable, + dml_float_t PixelClock, + enum dml_output_format_class OutputFormat, + dml_float_t OutputBpp, + dml_uint_t DSCSlices, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_uint_t AudioRate, + dml_uint_t AudioLayout) +{ + if (DSCEnable != true) { + return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2); + dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p) +{ + struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals; + + s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; + for (dml_uint_t j = 0; j < 2; ++j) { + + + s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]); + } + + for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { + s->NoOfDPPState[k] = p->NoOfDPP[j][k]; + } + + s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j]; + + s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth); + + s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j], + p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes, + 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); + s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_float_t DCFCLKCyclesRequiredInPrefetch; + dml_float_t PrefetchTime; + + s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth; + DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k]; + s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k]; + s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + + s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode, + p->UseMALLForPStateChange[k], + p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + p->DRRDisplay[k], + p->DRAMClockChangeLatencyFinal, + p->FCLKChangeLatency, + p->UrgLatency, + p->SREnterPlusExitTime); + + PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k]; + + if (PrefetchTime > 0) { + dml_float_t ExpectedVRatioPrefetch; + ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4); + if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth; + } + } else { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; + } + if (p->DynamicMetadataEnable[k] == true) { + dml_float_t TSetupPipe; + dml_float_t TdmbfPipe; + dml_float_t TdmsksPipe; + dml_float_t TdmecPipe; + dml_float_t AllowedTimeForUrgentExtraLatency; + + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->RequiredDPPCLKPerSurface[j][k], + p->RequiredDISPCLK[j], + p->ProjectedDCFCLKDeepSleep[j], + p->PixelClock[k], + p->HTotal[k], + p->VTotal[k] - p->VActive[k], + p->DynamicMetadataTransmittedBytes[k], + p->DynamicMetadataLinesBeforeActiveRequired[k], + p->Interlace[k], + p->ProgressiveToInterlaceUnitInOPP, + + // Output + &TSetupPipe, + &TdmbfPipe, + &TdmecPipe, + &TdmsksPipe, + &s->dummy1, + &s->dummy2, + &s->dummy3); + + AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k]; + if (AllowedTimeForUrgentExtraLatency > 0) { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); + } else { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; + } + } + } + s->DCFCLKRequiredForPeakBandwidth = 0; + for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { + s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k]; + } + s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0); + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_float_t MaximumTvmPlus2Tr0PlusTsw; + MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k]; + if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) { + s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState; + } else { + s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth, + 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4), + (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0)); + } + } + p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth)); + } +} + + +static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, + dml_uint_t TotalNumberOfActiveDPP, + dml_bool_t NoChromaOrLinear, + enum dml_output_encoder_class Output) +{ + dml_bool_t ret_val = false; + + ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable + && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) { + ret_val = false; + } + return (ret_val); +} + +static void CalculateSurfaceSizeInMall( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCN, + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + dml_bool_t DCCEnable[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportXStartY[], + dml_uint_t ViewportYStartY[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t ViewportWidthY[], + dml_uint_t ViewportHeightY[], + dml_uint_t BytesPerPixelY[], + dml_uint_t ViewportWidthC[], + dml_uint_t ViewportHeightC[], + dml_uint_t BytesPerPixelC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t ReadBlockWidthY[], + dml_uint_t ReadBlockWidthC[], + dml_uint_t ReadBlockHeightY[], + dml_uint_t ReadBlockHeightC[], + + // Output + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t *ExceededMALLSize) +{ + dml_uint_t TotalSurfaceSizeInMALL = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ViewportStationary[k]) { + SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) * + dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * + BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) * + dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256); + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256); + } + } + } else { + SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) * + dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256); + + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256); + } + } + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; + } + *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); +} // CalculateSurfaceSizeInMall + +static void CalculateDETBufferSize( + dml_uint_t DETSizeOverride[], + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t UnboundedRequestEnabled, + dml_uint_t nomDETInKByte, + dml_uint_t MaxTotalDETInKByte, + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t MinCompressedBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInkByte, + dml_uint_t CompressedBufferSegmentSizeInkByteFinal, + enum dml_source_format_class SourcePixelFormat[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t RoundedUpMaxSwathSizeBytesY[], + dml_uint_t RoundedUpMaxSwathSizeBytesC[], + dml_uint_t DPPPerSurface[], + // Output + dml_uint_t DETBufferSizeInKByte[], + dml_uint_t *CompressedBufferSizeInkByte) +{ + dml_uint_t DETBufferSizePoolInKByte; + dml_uint_t NextDETBufferPieceInKByte; + dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__]; + dml_bool_t NextPotentialSurfaceToAssignDETPieceFound; + dml_uint_t NextSurfaceToAssignDETPiece; + dml_float_t TotalBandwidth; + dml_float_t BandwidthOfSurfacesNotAssignedDETPiece; + dml_uint_t max_minDET; + dml_uint_t minDET; + dml_uint_t minDET_pipe; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal); +#endif + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (DETSizeOverride[0] > 0) { + DETBufferSizeInKByte[0] = DETSizeOverride[0]; + } else { + DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = 0; + if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { + max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; + } else { + max_minDET = nomDETInKByte; + } + minDET = 128; + minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (minDET <= max_minDET && minDET_pipe == 0) { + if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) + minDET_pipe = minDET; + minDET = minDET + ConfigReturnBufferSegmentSizeInkByte; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET); + dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET); + dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); +#endif + + if (minDET_pipe == 0) { + minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe); +#endif + } + + if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { + DETBufferSizeInKByte[k] = 0; + } else if (DETSizeOverride[k] > 0) { + DETBufferSizeInKByte[k] = DETSizeOverride[k]; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = minDET_pipe; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]); + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte); +#endif + } + + TotalBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) + TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + } + dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); +#endif + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); + BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece); +#endif + } + + for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + NextSurfaceToAssignDETPiece = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece); +#endif + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { + NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); +#endif + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue + // + //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + // + //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1); + //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2); + + NextDETBufferPieceInKByte = (dml_uint_t)(dml_min( + dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true) + * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, + dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte); + dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece); + dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); + dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte); + dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); +#ifdef __DML_VBA_DEBUG__ + dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +#endif +} // CalculateDETBufferSize + + +/// @brief Calculate the bound for return buffer sizing +static void CalculateMaxDETAndMinCompressedBufferSize( + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInKByte, + dml_uint_t ROBBufferSizeInKByte, + dml_uint_t MaxNumDPP, + dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA + + // Output + dml_uint_t *MaxTotalDETInKByte, + dml_uint_t *nomDETInKByte, + dml_uint_t *MinCompressedBufferSizeInKByte) +{ + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); +#endif + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} // CalculateMaxDETAndMinCompressedBufferSize + +/// @brief Calculate all the RQ request attributes, like row height and # swath +static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch, + struct CalculateVMRowAndSwath_params_st *p) +{ + struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->HostVMEnable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else if (p->GPUVMEnable == true) { + p->vm_group_bytes[k] = 2048; + if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { + p->dpte_group_bytes[k] = 512; + } else { + p->dpte_group_bytes[k] = 2048; + } + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 || + p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( + p->myPipe[k].ViewportStationary, + p->myPipe[k].DCCEnable, + p->myPipe[k].DPPPerSurface, + p->myPipe[k].BlockHeight256BytesC, + p->myPipe[k].BlockWidth256BytesC, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].SurfaceTiling, + p->myPipe[k].BytePerPixelC, + p->myPipe[k].SourceScan, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightChroma, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + p->GPUVMEnable, + p->GPUVMMaxPageTableLevels, + p->GPUVMMinPageSizeKBytes[k], + s->PTEBufferSizeInRequestsForChroma[k], + p->myPipe[k].PitchC, + p->myPipe[k].DCCMetaPitchC, + p->myPipe[k].BlockWidthC, + p->myPipe[k].BlockHeightC, + + // Output + &s->MetaRowByteC[k], + &s->PixelPTEBytesPerRowC[k], + &s->PixelPTEBytesPerRowStorageC[k], + &p->dpte_row_width_chroma_ub[k], + &p->dpte_row_height_chroma[k], + &p->dpte_row_height_linear_chroma[k], + &s->PixelPTEBytesPerRowC_one_row_per_frame[k], + &s->dpte_row_width_chroma_ub_one_row_per_frame[k], + &s->dpte_row_height_chroma_one_row_per_frame[k], + &p->meta_req_width_chroma[k], + &p->meta_req_height_chroma[k], + &p->meta_row_width_chroma[k], + &p->meta_row_height_chroma[k], + &p->PixelPTEReqWidthC[k], + &p->PixelPTEReqHeightC[k], + &p->PTERequestSizeC[k], + &p->dpde0_bytes_per_frame_ub_c[k], + &p->meta_pte_bytes_per_frame_ub_c[k]); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines ( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].SourceScan, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightChroma, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->PDEAndMetaPTEBytesFrameC = 0; + s->MetaRowByteC[k] = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + p->myPipe[k].ViewportStationary, + p->myPipe[k].DCCEnable, + p->myPipe[k].DPPPerSurface, + p->myPipe[k].BlockHeight256BytesY, + p->myPipe[k].BlockWidth256BytesY, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].SurfaceTiling, + p->myPipe[k].BytePerPixelY, + p->myPipe[k].SourceScan, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + p->GPUVMEnable, + p->GPUVMMaxPageTableLevels, + p->GPUVMMinPageSizeKBytes[k], + s->PTEBufferSizeInRequestsForLuma[k], + p->myPipe[k].PitchY, + p->myPipe[k].DCCMetaPitchY, + p->myPipe[k].BlockWidthY, + p->myPipe[k].BlockHeightY, + + // Output + &s->MetaRowByteY[k], + &s->PixelPTEBytesPerRowY[k], + &s->PixelPTEBytesPerRowStorageY[k], + &p->dpte_row_width_luma_ub[k], + &p->dpte_row_height_luma[k], + &p->dpte_row_height_linear_luma[k], + &s->PixelPTEBytesPerRowY_one_row_per_frame[k], + &s->dpte_row_width_luma_ub_one_row_per_frame[k], + &s->dpte_row_height_luma_one_row_per_frame[k], + &p->meta_req_width[k], + &p->meta_req_height[k], + &p->meta_row_width[k], + &p->meta_row_height[k], + &p->PixelPTEReqWidthY[k], + &p->PixelPTEReqHeightY[k], + &p->PTERequestSizeY[k], + &p->dpde0_bytes_per_frame_ub_l[k], + &p->meta_pte_bytes_per_frame_ub_l[k]); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].SourceScan, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + + p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels); + p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k]; + + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); +#endif + } + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]); + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY); + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC); + dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); +#endif + } + + CalculateMALLUseForStaticScreen( + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->UseMALLForStaticScreen, // mode + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->UsesMALLForStaticScreen); // boolen + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->PTEBufferModeOverrideEn[k] == 1) { + p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k]; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || + (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64); + p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12); + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || + (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]); + dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes); + dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->GPUVMEnable, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->MetaRowByteY[k], + s->MetaRowByteC[k], + p->meta_row_height[k], + p->meta_row_height_chroma[k], + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + // Output + &p->meta_row_bw[k], + &p->dpte_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]); + dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static void CalculateOutputLink( + dml_float_t PHYCLKPerState, + dml_float_t PHYCLKD18PerState, + dml_float_t PHYCLKD32PerState, + dml_float_t Downspreading, + dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClockBackEnd, + dml_float_t ForcedOutputLinkBPP, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t NumberOfDSCSlices, + dml_float_t AudioSampleRate, + dml_uint_t AudioSampleLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + enum dml_dsc_enable DSCEnable, + dml_uint_t OutputLinkDPLanes, + enum dml_output_link_dp_rate OutputLinkDPRate, + + // Output + dml_bool_t *RequiresDSC, + dml_bool_t *RequiresFEC, + dml_float_t *OutBpp, + enum dml_output_type_and_rate__type *OutputType, + enum dml_output_type_and_rate__rate *OutputRate, + dml_uint_t *RequiredSlots) +{ + dml_bool_t LinkDSCEnable; + dml_uint_t dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml_output_type_unknown; + *OutputRate = dml_output_rate_unknown; + + if (IsMainSurfaceUsingTheIndicatedTiming) { + if (Output == dml_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml_output_type_hdmi; + + } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) { + if (DSCEnable == dml_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp || Output == dml_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr3; + } + } + } + } +} + +/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy +static void CalculateODMMode( + dml_uint_t MaximumPixelsPerLinePerDSCUnit, + dml_uint_t HActive, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + enum dml_odm_use_policy ODMUse, + dml_float_t StateDispclk, + dml_float_t MaxDispclk, + dml_bool_t DSCEnable, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t MaxNumDPP, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_uint_t NumberOfDSCSlices, + + // Output + dml_bool_t *TotalAvailablePipesSupport, + dml_uint_t *NumberOfDPP, + enum dml_odm_mode *ODMMode, + dml_float_t *RequiredDISPCLKPerSurface) +{ + + dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine; + dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne; + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + *TotalAvailablePipesSupport = true; + + if (OutputFormat == dml_420) { + if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH) + *TotalAvailablePipesSupport = false; + else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml_odm_use_policy_combine_4to1; + else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1) + ODMUse = dml_odm_use_policy_combine_2to1; + if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1) + *TotalAvailablePipesSupport = false; + if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1) + *TotalAvailablePipesSupport = false; + } + + if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed) + *ODMMode = dml_odm_mode_bypass; + else if (ODMUse == dml_odm_use_policy_combine_2to1) + *ODMMode = dml_odm_mode_combine_2to1; + else if (ODMUse == dml_odm_use_policy_combine_4to1) + *ODMMode = dml_odm_mode_combine_4to1; + else if (ODMUse == dml_odm_use_policy_split_1to2) + *ODMMode = dml_odm_mode_split_1to2; + else if (ODMUse == dml_odm_use_policy_mso_1to2) + *ODMMode = dml_odm_mode_mso_1to2; + else if (ODMUse == dml_odm_use_policy_mso_1to4) + *ODMMode = dml_odm_mode_mso_1to4; + + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPP = 0; + + if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && + (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) { + if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { + *ODMMode = dml_odm_mode_combine_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *TotalAvailablePipesSupport = false; + } + } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && + ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || + (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { + if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { + *ODMMode = dml_odm_mode_combine_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } else { + *TotalAvailablePipesSupport = false; + } + } else { + if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) { + *NumberOfDPP = 1; + } else { + *TotalAvailablePipesSupport = false; + } + } +} + +/// @brief Calculate the required DISPCLK given the odm mode and pixclk +static dml_float_t CalculateRequiredDispclk( + enum dml_odm_mode ODMMode, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t MaxDispclk) +{ + dml_float_t RequiredDispclk = 0.; + dml_float_t PixelClockAfterODM; + + dml_float_t DISPCLKWithRampingRoundedToDFSGranularity; + dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity; + dml_float_t MaxDispclkRoundedDownToDFSGranularity; + + if (ODMMode == dml_odm_mode_combine_4to1) { + PixelClockAfterODM = PixelClock / 4; + } else if (ODMMode == dml_odm_mode_combine_2to1) { + PixelClockAfterODM = PixelClock / 2; + } else { + PixelClockAfterODM = PixelClock; + } + + DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed); + DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed); + MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); + + if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { + RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { + RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; + } else { + RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; + } + + return RequiredDispclk; +} + +/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + dml_float_t HRatio, + dml_float_t HRatioChroma, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t MaxDCHUBToPSCLThroughput, + dml_float_t MaxPSCLToLBThroughput, + dml_float_t PixelClock, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t HTaps, + dml_uint_t HTapsChroma, + dml_uint_t VTaps, + dml_uint_t VTapsChroma, + + // Output + dml_float_t *PSCL_THROUGHPUT, + dml_float_t *PSCL_THROUGHPUT_CHROMA, + dml_float_t *DPPCLKUsingSingleDPP) +{ + dml_float_t DPPCLKUsingSingleDPPLuma; + dml_float_t DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +/// @brief Calculate the actual dppclk freq +/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane +/// @param DPPPerSurface Number of DPP for each plane +static void CalculateDPPCLK( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t DPPCLKUsingSingleDPP[], + dml_uint_t DPPPerSurface[], + + // Output + dml_float_t *GlobalDPPCLK, + dml_float_t Dppclk[]) +{ + *GlobalDPPCLK = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); + } + *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); + + dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); + dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]); + } +} + +static void CalculateMALLUseForStaticScreen( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCNFinal, + enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t one_row_per_frame_fits_in_buffer[], + + // Output + dml_bool_t UsesMALLForStaticScreen[]) +{ + + dml_uint_t SurfaceToAddToMALL; + dml_bool_t CanAddAnotherSurfaceToMALL; + dml_uint_t TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable); + if (UsesMALLForStaticScreen[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]); + dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && + !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]); + } + } + if (CanAddAnotherSurfaceToMALL) { + UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +// @brief Calculate return bw for VM only traffic +dml_float_t dml_get_return_bw_mbps_vm_only( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed) +{ + dml_float_t VMDataOnlyReturnBW = + dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes * + ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); +#endif + return VMDataOnlyReturnBW; +} + +// Function: dml_get_return_bw_mbps +// Megabyte per second +dml_float_t dml_get_return_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed) +{ + dml_float_t ReturnBW = 0.; + dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK; + dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; + dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; + dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100); + dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100); + + if (HostVMEnable != true) { + ReturnBW = PixelDataOnlyReturnBW; + } else { + ReturnBW = PixelMixedWithVMDataReturnBW; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); + dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); + dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); + dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); + dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); + dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); +#endif + return ReturnBW; +} + +// Function: dml_get_return_dram_bw_mbps +// Megabyte per second +static dml_float_t dml_get_return_dram_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DRAMSpeed) +{ + dml_float_t ReturnDRAMBW = 0.; + dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; + dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100; + dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100; + + if (HostVMEnable != true) { + ReturnDRAMBW = PixelDataOnlyReturnBW; + } else { + ReturnDRAMBW = PixelMixedWithVMDataReturnBW; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); + dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); + dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); + dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW); +#endif + return ReturnDRAMBW; +} + +/// @brief BACKEND +static dml_uint_t DSCDelayRequirement( + dml_bool_t DSCEnabled, + enum dml_odm_mode ODMMode, + dml_uint_t DSCInputBitPerComponent, + dml_float_t OutputBpp, + dml_uint_t HActive, + dml_uint_t HTotal, + dml_uint_t NumberOfDSCSlices, + enum dml_output_format_class OutputFormat, + enum dml_output_encoder_class Output, + dml_float_t PixelClock, + dml_float_t PixelClockBackEnd) +{ + dml_uint_t DSCDelayRequirement_val = 0; + + if (DSCEnabled == true && OutputBpp != 0) { + if (ODMMode == dml_odm_mode_combine_4to1) { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } else if (ODMMode == dml_odm_mode_combine_2to1) { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } else { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } + DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0)); + DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled); + dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode); + dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + dml_print("DML::%s: HActive = %u\n", __func__, HActive); + dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); + dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[]) +{ + dml_bool_t NotEnoughUrgentLatencyHiding = false; + dml_bool_t CalculateVActiveBandwithSupport_val = false; + dml_float_t VActiveBandwith = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; + } + + CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding); + dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val); +#endif + return CalculateVActiveBandwithSupport_val; +} + +static void CalculatePrefetchBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *PrefetchBandwidth, + dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *PrefetchBandwidthSupport) +{ + dml_bool_t NotEnoughUrgentLatencyHiding = false; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + *PrefetchBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + *PrefetchBandwidthNotIncludingMALLPrefetch = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) + *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch + + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + + cursor_bw[k] * UrgentBurstFactorCursor[k] + + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth); + dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth); + dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport); +#endif +} + +static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t cursor_bw_pre[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[]) +{ + dml_float_t ret_val = ReturnBW; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u\n", __func__, k); + dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); + dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); + dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); + dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); + dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); + + dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); + dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); + dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); + dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); + dml_print("DML::%s: ret_val = %f\n", __func__, ret_val); +#endif + } + + return ret_val; +} + +static void CalculateImmediateFlipBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + enum dml_immediate_flip_requirement ImmediateFlipRequirement[], + dml_float_t final_flip_bw[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *TotalBandwidth, + dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *ImmediateFlipBandwidthSupport) +{ + *TotalBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { + + + + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } else { + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k = %u\n", __func__, k); + dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]); + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); + dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); + dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]); + dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]); + dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); + dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); + dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); + dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); + dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); + dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); + dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); + dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); + dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]); + dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]); +#endif + } + + *TotalBandwidthNotIncludingMALLPrefetch = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) + *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + else + *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + } + + *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); + *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); + dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport); +#endif +} + +static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock) +{ + dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0); + + return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0); +} + +/// @brief Calculate the maximum vstartup for mode support and mode programming consideration +/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge +static dml_uint_t CalculateMaxVStartup( + dml_uint_t plane_idx, + dml_bool_t ptoi_supported, + dml_uint_t vblank_nom_default_us, + struct dml_timing_cfg_st *timing, + dml_float_t write_back_delay_us) +{ + dml_uint_t vblank_size = 0; + dml_uint_t max_vstartup_lines = 0; + const dml_uint_t max_allowed_vblank_nom = 1023; + + dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx]; + dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx]; + + dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx], + timing->PixelClock[plane_idx]); + dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line); + + // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2) + // + 2 is because + // 1 -> VStartup_start should be 1 line before VSync + // 1 -> always reserve 1 line between start of VBlank to VStartup signal + dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input, + timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2); + dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom); + dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ? + vblank_nom_default_in_line : vblank_nom_max_allowed_capped; + + vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail); + + if (timing->Interlace[plane_idx] && !ptoi_supported) + max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx); + dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]); + dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us); + dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START); + return max_vstartup_lines; +} + +static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, + dml_uint_t j, + dml_uint_t k) +{ + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; + CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; +} + +static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +{ + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct DmlPipe *myPipe; + dml_uint_t j, k; + + for (j = 0; j < 2; ++j) { + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; + mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; + mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; + mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; + mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; + mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; + mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j]; + mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j]; + mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; + mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; + mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; + } + + mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.NotUrgentLatencyHiding, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j]); + + s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( + &mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.state.fabricclk_mhz, + mode_lib->ms.state.dram_speed_mts); + + s->HostVMInefficiencyFactor = 1; + if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) + s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState; + + mode_lib->ms.ExtraLatency = CalculateExtraLatency( + mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.pixel_chunk_size_kbytes, + mode_lib->ms.TotalNumberOfDCCActiveDPP[j], + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.dpte_group_bytes, + s->HostVMInefficiencyFactor, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); + + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + s->MaxVStartup = 0; + s->AllPrefetchModeTested = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); + s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; + } + + do { + s->MaxVStartup = s->NextMaxVStartup; + s->AllPrefetchModeTested = true; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k]; + mode_lib->ms.TWait = CalculateTWait( + mode_lib->ms.PrefetchMode[k], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], + mode_lib->ms.state.dram_clock_change_latency_us, + mode_lib->ms.state.fclk_change_latency_us, + mode_lib->ms.UrgLatency, + mode_lib->ms.state.sr_enter_plus_exit_time_us); + + myPipe = &s->myPipe; + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j]; + myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; + myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; + myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; + myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; + myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; + myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + myPipe->ODMMode = mode_lib->ms.ODMModePerState[k]; + myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k); + dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]); + dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup); + dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]); + dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); + dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); +#endif + + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); + CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + + set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k); + + mode_lib->ms.support.NoTimeForPrefetch[j][k] = + CalculatePrefetchSchedule(&mode_lib->scratch, + CalculatePrefetchSchedule_params); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.swath_width_luma_ub_this_state[k], + mode_lib->ms.swath_width_chroma_ub_this_state[k], + mode_lib->ms.SwathHeightYThisState[k], + mode_lib->ms.SwathHeightCThisState[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.VRatioPreY[j][k], + mode_lib->ms.VRatioPreC[j][k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeYThisState[k], + mode_lib->ms.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorCursorPre[k], + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotUrgentLatencyHidingPre[k]); + + mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k]; + } + + { + CalculatePrefetchBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.NotUrgentLatencyHidingPre, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *PrefetchBandwidth + &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.PrefetchSupported[j]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0 + || mode_lib->ms.LinesForMetaPTE[k] >= 32.0 + || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) { + mode_lib->ms.support.PrefetchSupported[j] = false; + } + } + + mode_lib->ms.support.DynamicMetadataSupported[j] = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) { + mode_lib->ms.support.DynamicMetadataSupported[j] = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported[j] = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true || + mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && + (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) { + mode_lib->ms.support.VRatioInPrefetchSupported[j] = false; + } + } + + s->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) { + s->AnyLinesForVMOrRowTooLarge = true; + } + } + + if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]); + if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); + } else { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k]; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateFlipSchedule( + s->HostVMInefficiencyFactor, + mode_lib->ms.ExtraLatency, + mode_lib->ms.UrgLatency, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], + mode_lib->ms.MetaRowBytes[j][k], + mode_lib->ms.DPTEBytesPerRow[j][k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.Tno_bw[k], + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.meta_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.meta_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24 + + /* Output */ + &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + { + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + mode_lib->ms.final_flip_bw, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *TotalBandwidth + &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch + &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)) + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + } else { // if prefetch not support, assume iflip not supported + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) { + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; + + if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) + s->AllPrefetchModeTested = false; + } + } else { + s->NextMaxVStartup = s->NextMaxVStartup - 1; + } + } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true && + mode_lib->ms.support.VRatioInPrefetchSupported[j] == true && + // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok + // If there is hostvm, DCN needs to support iflip for invalidation + ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) || + (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested))); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k]; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; + s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; + s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; + + CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; + CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j]; + CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j]; + CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height; + CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; + CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; + CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; + CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; + CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; + CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; + CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; + CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; + CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState; + + // Output + CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, + CalculateWatermarks_params); + + } // for j +} + +static noinline_for_stack void set_vm_row_and_swath_parameters(struct display_mode_lib_st *mode_lib) +{ + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; + CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; + CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA + CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5]; + CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7]; + CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9]; + CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState; + CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState; + CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state; + CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21]; +} + +/// @brief The Mode Support function. +dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) +{ + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params; + struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + + dml_uint_t j, k, m; + + mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); + dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ms.ip.config_return_buffer_size_in_kbytes, + mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA + mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); + + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false + && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0 + || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0 + || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1) + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k] + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k] + || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 || + (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) || + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio || + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio || + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] || + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) { + mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k]; + mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k]; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0; + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true + && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) { + mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] + * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] + / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] + * mode_lib->ms.cache_display_cfg.timing.HTotal[k] + / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0; + } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] + * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] + / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] + * mode_lib->ms.cache_display_cfg.timing.HTotal[k] + / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + /*Writeback Mode Support Check*/ + s->TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + } + } + + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + + /*Writeback Scale Ratio and Taps Support Check*/ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps + || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps + || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] + || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + mode_lib->ms.cache_display_cfg.plane.HRatio[k], + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.plane.HTaps[k], + mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], + mode_lib->ms.cache_display_cfg.plane.VTaps[k], + mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { + s->MaximumSwathWidthSupportLuma = 8192; + } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 7680; + } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 4320; + } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 3840; + } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + s->MaximumSwathWidthSupportLuma = 3072; + } else { + s->MaximumSwathWidthSupportLuma = 6144; + } + + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) { + s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] / + (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = + mode_lib->ms.ip.line_buffer_size_bits + * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0) + / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] + / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] + + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0)); + } + mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /*Number Of DSC Slices*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && + mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) { + mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k]; + + if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4)); + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */ + + s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + s->MPCCombineMethodAsPossible = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage) + s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible) + s->MPCCombineMethodAsPossible = true; + } + mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible; + + for (j = 0; j < 2; j++) { + mode_lib->ms.TotalNumberOfActiveDPP[j] = 0; + mode_lib->ms.support.TotalAvailablePipesSupport[j] = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateODMMode( + mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.policy.ODMUse[k], + mode_lib->ms.state.dispclk_mhz, + mode_lib->ms.max_state.dispclk_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.policy.ODMUse[k], + mode_lib->ms.state.dispclk_mhz, + mode_lib->ms.max_state.dispclk_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + CalculateOutputLink( + mode_lib->ms.state.phyclk_mhz, + mode_lib->ms.state.phyclk_d18_mhz, + mode_lib->ms.state.phyclk_d32_mhz, + mode_lib->ms.soc.phy_downspread_percent, + (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k), + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], + mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k], + s->ODMModeNoDSC, + s->ODMModeDSC, + mode_lib->ms.cache_display_cfg.output.DSCEnable[k], + mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k], + mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k], + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBppPerState[k], + &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng + &mode_lib->ms.OutputRatePerState[k], + &mode_lib->ms.RequiredSlots[k]); + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 4; + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 2; + } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100), + 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz && + mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { + mode_lib->ms.MPCCombine[j][k] = true; + mode_lib->ms.NoOfDPP[j][k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; + } else { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + } + } + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0; + s->NoChromaOrLinear = true; + for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[j][k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1; + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha + || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { + s->NoChromaOrLinear = false; + } + } + + if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting, + mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear, + mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) { + while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) { + s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage && + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth && + (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) && + mode_lib->ms.MPCCombine[j][k] == false) { + s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; + s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; + } + } + mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true; + mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1; + } + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { + mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.ip.writeback_line_buffer_buffer_size, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); + } + } + + mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]); + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + } + + CalculateDPPCLK(mode_lib->ms.num_active_planes, + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.MinDPPCLKUsingSingleDPP, + mode_lib->ms.NoOfDPPThisState, + /* Output */ + &mode_lib->ms.GlobalDPPCLK, + mode_lib->ms.RequiredDPPCLKThisState); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k]; + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz)); + + if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + } + } // j + + /* Total Available OTG, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) + s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + } + } + } + } + + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg); + mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs); + + /* Display IO and DSC Support Check */ + mode_lib->ms.support.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && + !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component + )) { + mode_lib->ms.support.NonsupportedDSCInputBPC = true; + } + } + + mode_lib->ms.support.ExceededMultistreamSlots = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) { + s->TotalSlots = mode_lib->ms.RequiredSlots[k]; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k) + s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j]; + } + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63) + mode_lib->ms.support.ExceededMultistreamSlots = true; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64) + mode_lib->ms.support.ExceededMultistreamSlots = true; + } + } + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0) + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; + if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) || + ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na) + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) + mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) + mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + } + } + + if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) + mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j) + mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + } + } + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 || + mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) || + (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl && + RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.OutputBppPerState[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } + + mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true; + mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) { + mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false; + } + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) { + mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false; + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) { + s->DSCFormatFactor = 2; + } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) { + s->DSCFormatFactor = 1; + } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz); + dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); +#endif + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } else { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } + } + } + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCUnits = false; + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } else { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + } + } + if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMModePerState[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.OutputBppPerState[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) { + mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m]; + } + } + } + } + + //Calculate Swath, DET Configuration, DCFCLKDeepSleep + // + for (j = 0; j < 2; ++j) { + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k]; + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j]; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k]; + mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k]; + mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k]; + mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k]; + mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k]; + mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState; + mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState; + mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k]; + mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k]; + mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + } + + CalculateSurfaceSizeInMall( + mode_lib->ms.num_active_planes, + mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, + mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; + mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; + mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; + mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; + mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; + mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; + mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; + mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; + mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; + mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; + s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; + s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; + s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; + s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; + s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; + s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; + s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; + s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + } + + set_vm_row_and_swath_parameters(mode_lib); + + CalculateVMRowAndSwath(&mode_lib->scratch, + CalculateVMRowAndSwath_params); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k]; + mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k]; + mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k]; + mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k]; + mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k]; + mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k]; + mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k]; + mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k]; + mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k]; + } + + mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]); +#endif + + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false; + } + + mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, + mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, + mode_lib->ms.state.urgent_latency_vm_data_only_us, + mode_lib->ms.soc.do_urgent_latency_adjustment, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, + mode_lib->ms.state.fabricclk_mhz); + + /* Getter functions work at mp interface so copy the urgent latency to mp*/ + mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.swath_width_luma_ub_this_state[k], + mode_lib->ms.swath_width_chroma_ub_this_state[k], + mode_lib->ms.SwathHeightYThisState[k], + mode_lib->ms.SwathHeightCThisState[k], + (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeYThisState[k], + mode_lib->ms.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorCursor[j][k], + &mode_lib->ms.UrgentBurstFactorLuma[j][k], + &mode_lib->ms.UrgentBurstFactorChroma[j][k], + &mode_lib->ms.NotUrgentLatencyHiding[k]); + } + + CalculateDCFCLKDeepSleep( + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + mode_lib->ms.SwathWidthYThisState, + mode_lib->ms.SwathWidthCThisState, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLKThisState, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]); + } + + //Calculate Return BW + for (j = 0; j < 2; ++j) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j]; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) { + mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k], + mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m], + mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]); + } + } + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m]; + } + } + } + s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->MaximumVStartup[j][k] = CalculateMaxVStartup(k, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.ip.vblank_nom_default_us, + &mode_lib->ms.cache_display_cfg.timing, + mode_lib->ms.WritebackDelayTime[k]); + + s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k])); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]); + dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]); +#endif + } + } + + s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + for (j = 0; j < 2; ++j) { + mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz; + } + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequiredFinal = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + } + + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified || + ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) && + (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)); + } + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal; + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) && + (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) || + ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) + s->FullFrameMALLPStateMethod = true; + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) { + s->SubViewportMALLPStateMethod = true; + if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) + || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + + if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) { + UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter; + UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us; + UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes; + UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles; + UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes; + UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes; + UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; + UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent; + UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent; + UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes; + UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired; + UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; + UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface; + UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK; + UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency; + UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP; + UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep; + UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup; + UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP; + UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP; + UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY; + UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC; + UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states; + UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states; + UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY; + UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC; + UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame; + UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes; + UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable; + UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz; + UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState; + + UseMinimumDCFCLK(&mode_lib->scratch, + UseMinimumDCFCLK_params); + + } // UseMinimumRequiredDCFCLK == true + + for (j = 0; j < 2; ++j) { + mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz, + mode_lib->ms.state.dram_speed_mts); + mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.state.dram_speed_mts); + } + + //Re-ordering Buffer Support Check + for (j = 0; j < 2; ++j) { + if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] > + (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) { + mode_lib->ms.support.ROBSupport[j] = true; + } else { + mode_lib->ms.support.ROBSupport[j] = false; + } + dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__); + } + + //Vertical Active BW support check + s->MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; + } + + for (j = 0; j < 2; ++j) { + mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0, + mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0, + mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes * + ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ? + mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0); + + if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) { + mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true; + } else { + mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false; + } + } + + /* Prefetch Check */ + dml_prefetch_check(mode_lib); + + // End of Prefetch Check + dml_print("DML::%s: Done prefetch calculation\n", __func__); + + /*Cursor Support Check*/ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) { + if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) { + mode_lib->ms.support.CursorSupport = false; + } + } + } + + /*Valid Pitch Check*/ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), + mode_lib->ms.MacroTileWidthY[k]); + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + } + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) { + mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]); + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + } else { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + } + } else { + mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + } + if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] || + mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) { + mode_lib->ms.support.PitchSupport = false; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) { + mode_lib->ms.support.ViewportExceedsSurface = true; + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 && + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) { + if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + + /*Mode Support, Voltage State and SOC Configuration*/ + for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on + dml_print("DML::%s: checking support for j=%u\n", __func__, j); + dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx); + + s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx); + s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts); + + s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal || + (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) || + mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported); + s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal || + (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) || + mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported); + + if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true + && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true + && mode_lib->ms.support.ViewportSizeSupport[j] == true + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + && mode_lib->ms.support.LinkCapacitySupport == true + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP + && !mode_lib->ms.support.DSC422NativeNotSupported + && !mode_lib->ms.support.MPCCombineMethodIncompatible + && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true + && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true + && mode_lib->ms.support.NotEnoughDSCUnits == false + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->ms.support.ROBSupport[j] == true + && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true + && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true + && mode_lib->ms.support.NumberOfOTGSupport == true + && mode_lib->ms.support.NumberOfHDMIFRLSupport == true + && mode_lib->ms.support.NumberOfDP2p0Support == true + && mode_lib->ms.support.EnoughWritebackUnits == true + && mode_lib->ms.support.WritebackLatencySupport == true + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true + && mode_lib->ms.support.CursorSupport == true + && mode_lib->ms.support.PitchSupport == true + && mode_lib->ms.support.ViewportExceedsSurface == false + && mode_lib->ms.support.PrefetchSupported[j] == true + && mode_lib->ms.support.VActiveBandwithSupport[j] == true + && mode_lib->ms.support.DynamicMetadataSupported[j] == true + && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true + && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true + && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true + && mode_lib->ms.support.NonsupportedDSCInputBPC == false + && !mode_lib->ms.support.ExceededMALLSize + && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) + && s->dram_clock_change_support == true + && s->f_clock_change_support == true + && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) { + dml_print("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport[j] = true; + } else { + dml_print("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport[j] = false; + dml_print_mode_support(mode_lib, j); + } + } + + mode_lib->ms.support.MaximumMPCCombine = 0; + mode_lib->ms.support.ModeIsSupported = 0; + if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine + mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true; + + // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc. + if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible || + (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal && + (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) && + !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) || + ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr + || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame + || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp + ) && + mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported))) + || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal && + ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) || + (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) { + mode_lib->ms.support.MaximumMPCCombine = 1; + } else { + mode_lib->ms.support.MaximumMPCCombine = 0; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip + mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational + mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational + + dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported); + dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine); + dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + } + + mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts; + mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz; + mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz; + mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine]; + mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine]; + mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine]; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k]; + } else { + mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass; + } + + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k]; + mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k]; + } + + return mode_lib->ms.support.ModeIsSupported; +} // dml_core_mode_support + +/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all +void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib) +{ + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ms.ip.config_return_buffer_size_in_kbytes, + mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.policy.NomDETInKByteOverrideEnable, + mode_lib->ms.policy.NomDETInKByteOverrideValue, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); + + mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.DRAMSpeed); + dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); + +} // dml_core_mode_support_partial + +/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state +void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg) +{ + struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + + struct mode_program_st *locals = &mode_lib->mp; + struct DmlPipe *myPipe; + dml_uint_t j = 0, k = 0; + dml_float_t TWait; + dml_bool_t isInterlaceTiming; + + mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); + dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- START --- \n", __func__); + dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); +#endif + + s->DSCFormatFactor = 0; + + // Unlike dppclk and dispclk which can be calculated in mode_programming + // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation) + if (clk_cfg->dcfclk_option != dml_use_override_freq) + locals->Dcfclk = mode_lib->ms.DCFCLK; + else + locals->Dcfclk = clk_cfg->dcfclk_mhz; + +#ifdef __DML_VBA_DEBUG__ + dml_print_dml_policy(&mode_lib->ms.policy); + dml_print_soc_state_bounding_box(&mode_lib->ms.state); + dml_print_soc_bounding_box(&mode_lib->ms.soc); + dml_print_clk_cfg(clk_cfg); + + dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk); + dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); +#endif + + locals->WritebackDISPCLK = 0.0; + locals->GlobalDPPCLK = 0.0; + + // DISPCLK and DPPCLK Calculation + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { + locals->WritebackDISPCLK = + dml_max( + locals->WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.ip.writeback_line_buffer_buffer_size, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); + } + } + + locals->Dispclk_calculated = locals->WritebackDISPCLK; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk( + mode_lib->ms.cache_display_cfg.hw.ODMMode[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.max_state.dispclk_mhz)); + } + } + if (clk_cfg->dispclk_option == dml_use_required_freq) + locals->Dispclk = locals->Dispclk_calculated; + else if (clk_cfg->dispclk_option == dml_use_override_freq) + locals->Dispclk = clk_cfg->dispclk_mhz; + else + locals->Dispclk = mode_lib->ms.state.dispclk_mhz; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk); +#endif + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + mode_lib->ms.cache_display_cfg.plane.HRatio[k], + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.plane.HTaps[k], + mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], + mode_lib->ms.cache_display_cfg.plane.VTaps[k], + mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], + + /* Output */ + &locals->PSCL_THROUGHPUT[k], + &locals->PSCL_THROUGHPUT_CHROMA[k], + &locals->DPPCLKUsingSingleDPP[k]); + } + + CalculateDPPCLK(mode_lib->ms.num_active_planes, + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + locals->DPPCLKUsingSingleDPP, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + /* Output */ + &locals->GlobalDPPCLK, + locals->Dppclk_calculated); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (clk_cfg->dppclk_option[k] == dml_use_required_freq) + locals->Dppclk[k] = locals->Dppclk_calculated[k]; + else if (clk_cfg->dppclk_option[k] == dml_use_override_freq) + locals->Dppclk[k] = clk_cfg->dppclk_mhz[k]; + else + locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]); +#endif + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + + /* Output */ + &locals->BytePerPixelY[k], + &locals->BytePerPixelC[k], + &locals->BytePerPixelDETY[k], + &locals->BytePerPixelDETC[k], + &locals->BlockHeight256BytesY[k], + &locals->BlockHeight256BytesC[k], + &locals->BlockWidth256BytesY[k], + &locals->BlockWidth256BytesC[k], + &locals->BlockHeightY[k], + &locals->BlockHeightC[k], + &locals->BlockWidthY[k], + &locals->BlockWidthC[k]); + } + + + dml_print("DML::%s: %u\n", __func__, __LINE__); + CalculateSwathWidth( + false, // ForceSingleDPP + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + mode_lib->ms.cache_display_cfg.hw.ODMMode, + locals->BytePerPixelY, + locals->BytePerPixelC, + locals->BlockHeight256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidth256BytesY, + locals->BlockWidth256BytesC, + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, + mode_lib->ms.cache_display_cfg.timing.HActive, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + + /* Output */ + locals->SwathWidthSingleDPPY, + locals->SwathWidthSingleDPPC, + locals->SwathWidthY, + locals->SwathWidthC, + s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[] + s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[] + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + + // VBA_DELTA + // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); + + // DCFCLK Deep Sleep + CalculateDCFCLKDeepSleep( + mode_lib->ms.num_active_planes, + locals->BytePerPixelY, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->SwathWidthY, + locals->SwathWidthC, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->PSCL_THROUGHPUT, + locals->PSCL_THROUGHPUT_CHROMA, + locals->Dppclk, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + mode_lib->ms.soc.return_bus_width_bytes, + + /* Output */ + &locals->DCFCLKDeepSleep); + + // DSCCLK + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) { + locals->DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) + s->DSCFormatFactor = 2; + else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) + s->DSCFormatFactor = 1; + else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) + s->DSCFormatFactor = 2; + else + s->DSCFormatFactor = 1; + if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1) + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1) + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + else + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + } + } + + // DSC Delay + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k], + mode_lib->ms.cache_display_cfg.hw.ODMMode[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.cache_display_cfg.output.OutputBpp[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces + if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j]) + locals->DSCDelay[k] = locals->DSCDelay[j]; + + // Prefetch + CalculateSurfaceSizeInMall( + mode_lib->ms.num_active_planes, + mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + locals->BytePerPixelY, + mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, + mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + locals->BlockWidth256BytesY, + locals->BlockWidth256BytesC, + locals->BlockHeight256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidthY, + locals->BlockWidthC, + locals->BlockHeightY, + locals->BlockHeightC, + + /* Output */ + locals->SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */ + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; + s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; + s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; + s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; + s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; + s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; + s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; + s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; + s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; + s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; + s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k]; + } + + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; + CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC; + CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width; + CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height; + CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width; + CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height; + CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC; + CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw; + CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame; + CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte; + CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE; + + CalculateVMRowAndSwath(&mode_lib->scratch, + CalculateVMRowAndSwath_params); + + s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3( + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + locals->Dcfclk, + mode_lib->ms.FabricClock, + mode_lib->ms.DRAMSpeed); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk); + dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes); + dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes); + dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent); + dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed); + dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans); + dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes); + dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx); + dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx); + dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW); + dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); +#endif + + s->HostVMInefficiencyFactor = 1.0; + if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) + s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW; + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + } + + locals->UrgentExtraLatency = CalculateExtraLatency( + mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderBytes, + locals->Dcfclk, + s->TotalActiveDPP, + mode_lib->ms.ip.pixel_chunk_size_kbytes, + s->TotalDCCActiveDPP, + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->dpte_group_bytes, + s->HostVMInefficiencyFactor, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); + + locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + locals->WritebackDelay[k] = + mode_lib->ms.state.writeback_latency_us + + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk; + } else + locals->WritebackDelay[k] = 0; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k + && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) { + locals->WritebackDelay[k] = + dml_max( + locals->WritebackDelay[k], + mode_lib->ms.state.writeback_latency_us + + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk); + } + } + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j) + locals->WritebackDelay[k] = locals->WritebackDelay[j]; + + locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, + mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, + mode_lib->ms.state.urgent_latency_vm_data_only_us, + mode_lib->ms.soc.do_urgent_latency_adjustment, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, + mode_lib->ms.FabricClock); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + locals->swath_width_luma_ub[k], + locals->swath_width_chroma_ub[k], + locals->SwathHeightY[k], + locals->SwathHeightC[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + locals->UrgentLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + locals->DETBufferSizeY[k], + locals->DETBufferSizeC[k], + + /* output */ + &locals->UrgBurstFactorCursor[k], + &locals->UrgBurstFactorLuma[k], + &locals->UrgBurstFactorChroma[k], + &locals->NoUrgentLatencyHiding[k]); + + locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / + ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + } + + s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + s->MaxVStartupAllPlanes = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup(k, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.ip.vblank_nom_default_us, + &mode_lib->ms.cache_display_cfg.timing, + locals->WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]); +#endif + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k])); + + s->ImmediateFlipRequirementFinal = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal); +#endif + + // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement + // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) + s->iteration = 0; + s->MaxTotalRDBandwidth = 0; + s->AllPrefetchModeTested = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); + s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; + } + + do { + s->MaxTotalRDBandwidthNoUrgentBurst = 0.0; + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines); + + s->AllPrefetchModeTested = true; + s->MaxTotalRDBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->PrefetchMode[k] = s->NextPrefetchMode[k]; + TWait = CalculateTWait( + locals->PrefetchMode[k], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], + mode_lib->ms.state.dram_clock_change_latency_us, + mode_lib->ms.state.fclk_change_latency_us, + locals->UrgentLatency, + mode_lib->ms.state.sr_enter_plus_exit_time_us); + + myPipe = &s->myPipe; + myPipe->Dppclk = locals->Dppclk[k]; + myPipe->Dispclk = locals->Dispclk; + myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; + myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; + myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; + myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; + myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; + myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; + myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; + myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; + myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k]; + myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + myPipe->BytePerPixelY = locals->BytePerPixelY[k]; + myPipe->BytePerPixelC = locals->BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); + dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]); +#endif + + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); + CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency; + CalculatePrefetchSchedule_params->TCalc = locals->TCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k]; + CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = TWait; + CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k]; + + locals->NoTimeToPrefetch[k] = + CalculatePrefetchSchedule(&mode_lib->scratch, + CalculatePrefetchSchedule_params); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]); +#endif + locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); + locals->VStartupMin[k] = locals->VStartup[k]; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + locals->swath_width_luma_ub[k], + locals->swath_width_chroma_ub[k], + locals->SwathHeightY[k], + locals->SwathHeightC[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + locals->UrgentLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + locals->VRatioPrefetchY[k], + locals->VRatioPrefetchC[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + locals->DETBufferSizeY[k], + locals->DETBufferSizeC[k], + /* Output */ + &locals->UrgBurstFactorCursorPre[k], + &locals->UrgBurstFactorLumaPre[k], + &locals->UrgBurstFactorChromaPre[k], + &locals->NoUrgentLatencyHidingPre[k]); + + locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]); + + dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]); + dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]); + + dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]); + dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]); + dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]); + dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]); + dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]); + dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]); + dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]); + dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst); +#endif + if (locals->DestinationLinesForPrefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && + (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__))) + s->VRatioPrefetchMoreThanMax = true; + + //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false; + //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false; + //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) { + // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true; + //} + + //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) { + // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true; + //} + } + + locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst); + dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW); + dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth); +#endif + + CalculatePrefetchBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->NoUrgentLatencyHidingPre, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre, + + /* output */ + &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth + &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth + &locals->PrefetchModeSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + s->dummy_unit_vector[k] = 1.0; + + CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->NoUrgentLatencyHidingPre, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + + /* output */ + &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth + &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &locals->FractionOfUrgentBandwidth, + &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { + dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + locals->PrefetchModeSupported = false; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) { + dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]); + dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]); + locals->PrefetchModeSupported = false; + } + } + + + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { + locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->cursor_bw_pre, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre); + + locals->TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]); + if (locals->use_one_row_for_frame_flip[k]) { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]); + } else { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k = %u\n", __func__, k); + dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]); + dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]); + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes); +#endif + } + } + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateFlipSchedule( + s->HostVMInefficiencyFactor, + locals->UrgentExtraLatency, + locals->UrgentLatency, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + locals->PDEAndMetaPTEBytesFrame[k], + locals->MetaRowByte[k], + locals->PixelPTEBytesPerRow[k], + locals->BandwidthAvailableForImmediateFlip, + locals->TotImmediateFlipBytes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + locals->Tno_bw[k], + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + locals->dpte_row_height[k], + locals->meta_row_height[k], + locals->dpte_row_height_chroma[k], + locals->meta_row_height_chroma[k], + locals->use_one_row_for_frame_flip[k], + + /* Output */ + &locals->DestinationLinesToRequestVMInImmediateFlip[k], + &locals->DestinationLinesToRequestRowInImmediateFlip[k], + &locals->final_flip_bw[k], + &locals->ImmediateFlipSupportedForPipe[k]); + } + + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + locals->final_flip_bw, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre, + + /* output */ + &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth + &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch + &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth + &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport + + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + locals->final_flip_bw, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + + /* output */ + &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth + &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch + &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth + &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) { + locals->ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); +#endif + } + } + } else { + locals->ImmediateFlipSupported = false; + locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth; + locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch; + locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth; + locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch; + } + + /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */ + locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true && + ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) || + locals->ImmediateFlipSupported)) ? true : false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported); + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable); + dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported); +#endif + dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes); + + s->VStartupLines = s->VStartupLines + 1; + + if (s->VStartupLines > s->MaxVStartupAllPlanes) { + s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; + + if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) + s->AllPrefetchModeTested = false; + dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested); + } + } else { + s->AllPrefetchModeTested = false; + } + s->iteration++; + if (s->iteration > 2500) { + dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__); + ASSERT(0); + } + } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested)); + + if (locals->PrefetchAndImmediateFlipSupported) { + dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes); + } else { + dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + { + s->mmSOCParameters.UrgentLatency = locals->UrgentLatency; + s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency; + s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; + s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; + s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; + + CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; + CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = locals->Dcfclk; + CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW; + CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes; + CalculateWatermarks_params->meta_row_height = locals->meta_row_height; + CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY; + CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC; + CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; + CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY; + CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC; + CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; + CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; + CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; + CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY; + CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC; + CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler; + CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; + CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; + CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; + CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; + CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; + + // Output + CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &mode_lib->scratch, + CalculateWatermarks_params); + + /* Copy the calculated watermarks to mp.Watermark as the getter functions are + * implemented by the DML team to copy the calculated values from the mp.Watermark interface. + * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to + * unexpected behavior. memmove should be used. + */ + memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark); + locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark); + } else { + locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0; + locals->WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + } + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->VRatioPrefetchY, + locals->VRatioPrefetchC, + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->PSCL_THROUGHPUT, + locals->PSCL_THROUGHPUT_CHROMA, + locals->Dppclk, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + mode_lib->ms.cache_display_cfg.plane.NumberOfCursors, + mode_lib->ms.cache_display_cfg.plane.CursorWidth, + mode_lib->ms.cache_display_cfg.plane.CursorBPP, + locals->BlockWidth256BytesY, + locals->BlockHeight256BytesY, + locals->BlockWidth256BytesC, + locals->BlockHeight256BytesC, + + /* Output */ + locals->DisplayPipeLineDeliveryTimeLuma, + locals->DisplayPipeLineDeliveryTimeChroma, + locals->DisplayPipeLineDeliveryTimeLumaPrefetch, + locals->DisplayPipeLineDeliveryTimeChromaPrefetch, + locals->DisplayPipeRequestDeliveryTimeLuma, + locals->DisplayPipeRequestDeliveryTimeChroma, + locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, + locals->DisplayPipeRequestDeliveryTimeChromaPrefetch, + locals->CursorRequestDeliveryTime, + locals->CursorRequestDeliveryTimePrefetch); + + CalculateMetaAndPTETimes( + locals->use_one_row_for_frame, + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ip.min_meta_chunk_size_bytes, + mode_lib->ms.cache_display_cfg.timing.HTotal, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->DestinationLinesToRequestRowInVBlank, + locals->DestinationLinesToRequestRowInImmediateFlip, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->BytePerPixelY, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + locals->dpte_row_height, + locals->dpte_row_height_chroma, + locals->meta_row_width, + locals->meta_row_width_chroma, + locals->meta_row_height, + locals->meta_row_height_chroma, + locals->meta_req_width, + locals->meta_req_width_chroma, + locals->meta_req_height, + locals->meta_req_height_chroma, + locals->dpte_group_bytes, + locals->PTERequestSizeY, + locals->PTERequestSizeC, + locals->PixelPTEReqWidthY, + locals->PixelPTEReqHeightY, + locals->PixelPTEReqWidthC, + locals->PixelPTEReqHeightC, + locals->dpte_row_width_luma_ub, + locals->dpte_row_width_chroma_ub, + + /* Output */ + locals->DST_Y_PER_PTE_ROW_NOM_L, + locals->DST_Y_PER_PTE_ROW_NOM_C, + locals->DST_Y_PER_META_ROW_NOM_L, + locals->DST_Y_PER_META_ROW_NOM_C, + locals->TimePerMetaChunkNominal, + locals->TimePerChromaMetaChunkNominal, + locals->TimePerMetaChunkVBlank, + locals->TimePerChromaMetaChunkVBlank, + locals->TimePerMetaChunkFlip, + locals->TimePerChromaMetaChunkFlip, + locals->time_per_pte_group_nom_luma, + locals->time_per_pte_group_vblank_luma, + locals->time_per_pte_group_flip_luma, + locals->time_per_pte_group_nom_chroma, + locals->time_per_pte_group_vblank_chroma, + locals->time_per_pte_group_flip_chroma); + + CalculateVMGroupAndRequestTimes( + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.timing.HTotal, + locals->BytePerPixelC, + locals->DestinationLinesToRequestVMInVBlank, + locals->DestinationLinesToRequestVMInImmediateFlip, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->dpte_row_width_luma_ub, + locals->dpte_row_width_chroma_ub, + locals->vm_group_bytes, + locals->dpde0_bytes_per_frame_ub_l, + locals->dpde0_bytes_per_frame_ub_c, + locals->meta_pte_bytes_per_frame_ub_l, + locals->meta_pte_bytes_per_frame_ub_c, + + /* Output */ + locals->TimePerVMGroupVBlank, + locals->TimePerVMGroupFlip, + locals->TimePerVMRequestVBlank, + locals->TimePerVMRequestFlip); + + // Min TTUVBlank + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (locals->PrefetchMode[k] == 0) { + locals->MinTTUVBlank[k] = dml_max4( + locals->Watermark.DRAMClockChangeWatermark, + locals->Watermark.FCLKChangeWatermark, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else if (locals->PrefetchMode[k] == 1) { + locals->MinTTUVBlank[k] = dml_max3( + locals->Watermark.FCLKChangeWatermark, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else if (locals->PrefetchMode[k] == 2) { + locals->MinTTUVBlank[k] = dml_max( + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else { + locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark; + } + if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]) + locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k]; + } + + // DCC Configuration + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k], + mode_lib->ms.NomDETInKByte, + locals->BlockHeight256BytesY[k], + locals->BlockHeight256BytesC[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + locals->BytePerPixelY[k], + locals->BytePerPixelC[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + mode_lib->ms.cache_display_cfg.plane.SourceScan[k], + /* Output */ + &locals->DCCYMaxUncompressedBlock[k], + &locals->DCCCMaxUncompressedBlock[k], + &locals->DCCYMaxCompressedBlock[k], + &locals->DCCCMaxCompressedBlock[k], + &locals->DCCYIndependentBlock[k], + &locals->DCCCIndependentBlock[k]); + } + + // VStartup Adjustment + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]); +#endif + + locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]); +#endif + + locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin; + if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) { + locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin; + } + + isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported); + + // The actual positioning of the vstartup + locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : + mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); + s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; + s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k]; + + if (s->blank_lines_remaining < 0) { + dml_print("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + ASSERT(0); + } + locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : + mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) + + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0)) + + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; + + if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <= + (isInterlaceTiming ? + dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) : + (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) { + locals->VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + locals->VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]); + dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]); + dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]); + dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]); + dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]); + dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]); + dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]); + dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]); + dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); + dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]); + dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]); + dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START); + dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + s->TotalWRBandwidth = 0; + s->WRBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) { + s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4; + } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8; + } + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; + } + + locals->TotalDataReadBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); +#endif + } + + locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch + + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; + } + } + + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; + CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY; + CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC; + CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma; + CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma; + CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma; + CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma; + CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable; + CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma; + CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw; + CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw; + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, + CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + { + dml_float_t dummy_single[2]; + dml_uint_t dummy_integer[1]; + dml_bool_t dummy_boolean[1]; + + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency( + locals->CompressedBufferSizeInkByte, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->UnboundedRequestEnabled, + mode_lib->ms.ip.meta_fifo_size_in_kentries, + mode_lib->ms.ip.zero_size_buffer_entries, + mode_lib->ms.ip.pixel_chunk_size_kbytes, + mode_lib->ms.num_active_planes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + locals->TotalDataReadBandwidth, + locals->Dcfclk, + mode_lib->ms.ReturnBW, + 0, //mode_lib->ms.ip.compbuf_reserved_space_64b, + 0, //mode_lib->ms.ip.compbuf_reserved_space_zs, + mode_lib->ms.state.sr_exit_time_us, + mode_lib->ms.state.sr_exit_z8_time_us, + mode_lib->ms.policy.SynchronizeTimingsFinal, + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.Z8StutterEnterPlusExitWatermark, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.cache_display_cfg.timing.Interlace, + locals->MinTTUVBlank, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.DETBufferSizeY, + locals->BytePerPixelY, + locals->BytePerPixelDETY, + locals->SwathWidthY, + mode_lib->ms.SwathHeightY, + mode_lib->ms.SwathHeightC, + mode_lib->ms.cache_display_cfg.surface.DCCRateLuma, + mode_lib->ms.cache_display_cfg.surface.DCCRateChroma, + mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma, + mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma, + mode_lib->ms.cache_display_cfg.timing.HTotal, + mode_lib->ms.cache_display_cfg.timing.VTotal, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + locals->BlockHeight256BytesY, + locals->BlockWidth256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidth256BytesC, + locals->DCCYMaxUncompressedBlock, + locals->DCCCMaxUncompressedBlock, + mode_lib->ms.cache_display_cfg.timing.VActive, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.writeback.WritebackEnable, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->meta_row_bw, + locals->dpte_row_bw, + + /* Output */ + &dummy_single[0], + &dummy_single[1], + &dummy_integer[0], + &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase, + &locals->Z8StutterEfficiencyBestCase, + &locals->Z8NumberOfStutterBurstsPerFrameBestCase, + &locals->StutterPeriodBestCase, + &dummy_boolean[0]); + } +#else + locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank; + locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency; + locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame; + locals->StutterPeriodBestCase = locals->StutterPeriod; +#endif + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- END --- \n", __func__); +#endif +} // dml_core_mode_programming + +/// Function: dml_core_get_row_heights +/// @brief Get row height for DPTE and META with minimal input. +void dml_core_get_row_heights( + dml_uint_t *dpte_row_height, + dml_uint_t *meta_row_height, + const struct display_mode_lib_st *mode_lib, + dml_bool_t is_plane1, + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + enum dml_rotation_angle ScanDirection, + dml_uint_t pitch, + dml_uint_t GPUVMMinPageSizeKBytes) +{ + dml_uint_t BytePerPixelY; + dml_uint_t BytePerPixelC; + dml_float_t BytePerPixelInDETY; + dml_float_t BytePerPixelInDETC; + dml_uint_t BlockHeight256BytesY; + dml_uint_t BlockHeight256BytesC; + dml_uint_t BlockWidth256BytesY; + dml_uint_t BlockWidth256BytesC; + dml_uint_t MacroTileWidthY; + dml_uint_t MacroTileWidthC; + dml_uint_t MacroTileHeightY; + dml_uint_t MacroTileHeightC; + + dml_uint_t BytePerPixel; + dml_uint_t BlockHeight256Bytes; + dml_uint_t BlockWidth256Bytes; + dml_uint_t MacroTileWidth; + dml_uint_t MacroTileHeight; + dml_uint_t PTEBufferSizeInRequests; + + dml_uint_t dummy_integer[16]; + + CalculateBytePerPixelAndBlockSizes( + SourcePixelFormat, + SurfaceTiling, + + /* Output */ + &BytePerPixelY, + &BytePerPixelC, + &BytePerPixelInDETY, + &BytePerPixelInDETC, + &BlockHeight256BytesY, + &BlockHeight256BytesC, + &BlockWidth256BytesY, + &BlockWidth256BytesC, + &MacroTileHeightY, + &MacroTileHeightC, + &MacroTileWidthY, + &MacroTileWidthC); + + BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; + BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; + BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; + MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; + MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; + PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1); + dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); +#endif + + // just supply with enough parameters to calculate meta and dte + CalculateVMAndRowBytes( + 0, // dml_bool_t ViewportStationary, + 1, // dml_bool_t DCCEnable, + 1, // dml_uint_t NumberOfDPPs, + BlockHeight256Bytes, + BlockWidth256Bytes, + SourcePixelFormat, + SurfaceTiling, + BytePerPixel, + ScanDirection, + 0, // dml_uint_t SwathWidth, + 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful) + 0, // dml_uint_t ViewportXStart, + 0, // dml_uint_t ViewportYStart, + 1, // dml_bool_t GPUVMEnable, + 4, // dml_uint_t GPUVMMaxPageTableLevels, + GPUVMMinPageSizeKBytes, + PTEBufferSizeInRequests, + pitch, + 0, // dml_uint_t DCCMetaPitch, + MacroTileWidth, + MacroTileHeight, + + // /* Output */ + &dummy_integer[0], // dml_uint_t *MetaRowByte, + &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow, + &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage, + &dummy_integer[3], // dml_uint_t *dpte_row_width_ub, + dpte_row_height, + &dummy_integer[4], // dml_uint_t *dpte_row_height_linear + &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame, + &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame, + &dummy_integer[8], // dml_uint_t *MetaRequestWidth, + &dummy_integer[9], // dml_uint_t *MetaRequestHeight, + &dummy_integer[10], // dml_uint_t *meta_row_width, + meta_row_height, + &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth, + &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight, + &dummy_integer[13], // dml_uint_t *PTERequestSize, + &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame, + &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame) + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); + dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height); +#endif +} + +static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box( + const struct soc_states_st *states, + dml_uint_t state_idx) +{ + dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states); + + if (state_idx >= (dml_uint_t)states->num_states) { + dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states); + ASSERT(0); + } + return (states->state_array[state_idx]); +} + +/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have +/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated. +/// +static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx) +{ + mode_lib->ms.state_idx = state_idx; + mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1; + mode_lib->ms.soc = mode_lib->soc; + mode_lib->ms.ip = mode_lib->ip; + mode_lib->ms.policy = mode_lib->policy; + mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx); + mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1); +} + +static void cache_display_cfg(struct display_mode_lib_st *mode_lib, + const struct dml_display_cfg_st *display_cfg) +{ + mode_lib->ms.cache_display_cfg = *display_cfg; +} + +static void fetch_socbb_params(struct display_mode_lib_st *mode_lib) +{ + struct soc_state_bounding_box_st *state = &mode_lib->ms.state; + + // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step + // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support + mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz; + mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts; + mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz; + mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz; +} + +/// @brief Use display_cfg directly for mode_support calculation +/// Calculated values and informational output are stored in mode_lib.vba data struct +/// The display configuration is described with pipes struct and num_pipes +/// This function is used when physical resource mapping is not finalized (for example, +/// don't know how many pipes to represent a surface) +/// @param mode_lib Contains the bounding box and policy setting. +/// @param state_idx Power state index +/// @param display_cfg Display configurations. A display +dml_bool_t dml_mode_support( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg) +{ + dml_bool_t is_mode_support; + + dml_print("DML::%s: ------------- START ----------\n", __func__); + cache_ip_soc_cfg(mode_lib, state_idx); + cache_display_cfg(mode_lib, display_cfg); + + fetch_socbb_params(mode_lib); + + dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); + + is_mode_support = dml_core_mode_support(mode_lib); + + dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support); + dml_print("DML::%s: ------------- DONE ----------\n", __func__); + return is_mode_support; +} + +/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported) +/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions +/// Calculated values include: watermarks, dlg, rq reg, different clock frequency +/// This function returns 1 when there is no error. +/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK +/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. +/// @param state_idx Power state idx chosen +/// @param display_cfg Display Configuration +/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming +/// TODO: Add clk_cfg input, could be useful for standalone mode +dml_bool_t dml_mode_programming( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg, + bool call_standalone) +{ + struct dml_clk_cfg_st clk_cfg; + memset(&clk_cfg, 0, sizeof(clk_cfg)); + + clk_cfg.dcfclk_option = dml_use_required_freq; + clk_cfg.dispclk_option = dml_use_required_freq; + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) + clk_cfg.dppclk_option[k] = dml_use_required_freq; + + dml_print("DML::%s: ------------- START ----------\n", __func__); + dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); + dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone); + + cache_ip_soc_cfg(mode_lib, state_idx); + cache_display_cfg(mode_lib, display_cfg); + + fetch_socbb_params(mode_lib); + if (call_standalone) { + mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine + dml_core_mode_support_partial(mode_lib); + } + + dml_core_mode_programming(mode_lib, &clk_cfg); + + dml_print("DML::%s: ------------- DONE ----------\n", __func__); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); + return mode_lib->mp.PrefetchAndImmediateFlipSupported; +} + +static dml_uint_t mode_support_pwr_states( + dml_uint_t *lowest_state_idx, + struct display_mode_lib_st *mode_lib, + const struct dml_display_cfg_st *display_cfg, + dml_uint_t start_state_idx, + dml_uint_t end_state_idx) +{ + dml_uint_t state_idx = 0; + dml_bool_t mode_is_supported = 0; + *lowest_state_idx = end_state_idx; + + if (end_state_idx < start_state_idx) + ASSERT(0); + + if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based + ASSERT(0); + + for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) { + if (dml_mode_support(mode_lib, state_idx, display_cfg)) { + dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx); + mode_is_supported = 1; + *lowest_state_idx = state_idx; + break; + } + } + + return mode_is_supported; +} + +dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params) +{ + dml_uint_t result; + + result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx, + in_out_params->mode_lib, + in_out_params->in_display_cfg, + in_out_params->in_start_state_idx, + in_out_params->mode_lib->states.num_states - 1); + + if (result) + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + + return result; +} + +dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]); + return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe); +} + + +#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \ +{ \ + dml_uint_t plane_idx; \ + plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \ + return (type) interval_var[plane_idx]; \ +} + +#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \ +{ \ + return (type) internal_var; \ +} + +dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark); +dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark); +dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); +dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency); +dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark); +dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark); +dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark); +dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); +dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark); +dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth); +dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); +dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency); +dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep); +dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); +dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark); +dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency); +dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); +dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency); +dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod); +dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase); +dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); +dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase); +dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency); +dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); +dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated); +dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth); +dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW); +dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW); +dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc); +dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte); +dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes); +dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes); +dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes); +dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes); +dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes); +dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes); + +dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency +dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated); +dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated); +dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank); +dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY); +dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC); +dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler); +dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler); +dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank); +dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank); +dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch); +dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip); +dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip); +dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L); +dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C); +dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L); +dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C); +dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank); +dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip); +dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank); +dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip); +dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm); +dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl); +dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma); +dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma); +dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch); +dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch); +dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma); +dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma); +dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch); +dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); +dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime); +dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip); +dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma); +dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma); +dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma); +dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes); +dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes); +dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY); +dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC); +dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height); +dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma); +dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear); +dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma); +dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height); +dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma); + +dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup); +dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix); +dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix); +dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix); +dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC); +dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START); +dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY); +dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC); +dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen); +dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL); +dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock); +dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock); +dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock); +dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock); +dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock); +dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock); +dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported); +dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE); +dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE); +dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow); +dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte); +dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte); + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h new file mode 100644 index 000000000000..a38ed89c47a9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h @@ -0,0 +1,204 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_CORE_H__ +#define __DISPLAY_MODE_CORE_H__ + +#include "display_mode_core_structs.h" + +struct display_mode_lib_st; + +dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib); +void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib); +void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg); + +void dml_core_get_row_heights( + dml_uint_t *dpte_row_height, + dml_uint_t *meta_row_height, + const struct display_mode_lib_st *mode_lib, + dml_bool_t is_plane1, + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + enum dml_rotation_angle ScanDirection, + dml_uint_t pitch, + dml_uint_t GPUVMMinPageSizeKBytes); + +dml_float_t dml_get_return_bw_mbps_vm_only( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed); + +dml_float_t dml_get_return_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed); + +dml_bool_t dml_mode_support( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg); + +dml_bool_t dml_mode_programming( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg, + bool call_standalone); + +dml_uint_t dml_mode_support_ex( + struct dml_mode_support_ex_params_st *in_out_params); + +dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); + +#define dml_get_per_surface_var_decl(variable, type) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) +#define dml_get_var_decl(var, type) type dml_get_##var(struct display_mode_lib_st *mode_lib) + +dml_get_var_decl(wm_urgent, dml_float_t); +dml_get_var_decl(wm_stutter_exit, dml_float_t); +dml_get_var_decl(wm_stutter_enter_exit, dml_float_t); +dml_get_var_decl(wm_memory_trip, dml_float_t); +dml_get_var_decl(wm_dram_clock_change, dml_float_t); +dml_get_var_decl(wm_z8_stutter_enter_exit, dml_float_t); +dml_get_var_decl(wm_z8_stutter, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); +dml_get_var_decl(clk_dcf_deepsleep, dml_float_t); +dml_get_var_decl(wm_fclk_change, dml_float_t); +dml_get_var_decl(wm_usr_retraining, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); + +dml_get_var_decl(wm_writeback_dram_clock_change, dml_float_t); +dml_get_var_decl(wm_writeback_urgent, dml_float_t); +dml_get_var_decl(stutter_efficiency_no_vblank, dml_float_t); +dml_get_var_decl(stutter_efficiency, dml_float_t); +dml_get_var_decl(stutter_efficiency_z8, dml_float_t); +dml_get_var_decl(stutter_num_bursts_z8, dml_float_t); +dml_get_var_decl(stutter_period, dml_float_t); +dml_get_var_decl(stutter_efficiency_z8_bestcase, dml_float_t); +dml_get_var_decl(stutter_num_bursts_z8_bestcase, dml_float_t); +dml_get_var_decl(stutter_period_bestcase, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); +dml_get_var_decl(urgent_extra_latency, dml_float_t); +dml_get_var_decl(fclk_change_latency, dml_float_t); +dml_get_var_decl(nonurgent_latency, dml_float_t); +dml_get_var_decl(dispclk_calculated, dml_float_t); +dml_get_var_decl(total_data_read_bw, dml_float_t); +dml_get_var_decl(return_bw, dml_float_t); +dml_get_var_decl(return_dram_bw, dml_float_t); +dml_get_var_decl(tcalc, dml_float_t); +dml_get_var_decl(fraction_of_urgent_bandwidth, dml_float_t); +dml_get_var_decl(fraction_of_urgent_bandwidth_imm_flip, dml_float_t); +dml_get_var_decl(comp_buffer_size_kbytes, dml_uint_t); +dml_get_var_decl(pixel_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(alpha_pixel_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(meta_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(min_pixel_chunk_size_in_byte, dml_uint_t); +dml_get_var_decl(min_meta_chunk_size_in_byte, dml_uint_t); +dml_get_var_decl(total_immediate_flip_bytes, dml_uint_t); + +dml_get_per_surface_var_decl(dsc_delay, dml_uint_t); +dml_get_per_surface_var_decl(dppclk_calculated, dml_float_t); +dml_get_per_surface_var_decl(dscclk_calculated, dml_float_t); +dml_get_per_surface_var_decl(min_ttu_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(vratio_prefetch_l, dml_float_t); +dml_get_per_surface_var_decl(vratio_prefetch_c, dml_float_t); +dml_get_per_surface_var_decl(dst_x_after_scaler, dml_uint_t); +dml_get_per_surface_var_decl(dst_y_after_scaler, dml_uint_t); +dml_get_per_surface_var_decl(dst_y_per_vm_vblank, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_row_vblank, dml_float_t); +dml_get_per_surface_var_decl(dst_y_prefetch, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_vm_flip, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_row_flip, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_l, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_c, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_l, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_c, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_group_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_group_flip_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_req_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_req_flip_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_dmdata_in_us, dml_float_t); +dml_get_per_surface_var_decl(dmdata_dl_delta_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_c_in_us, dml_float_t); + +dml_get_per_surface_var_decl(dpte_group_size_in_bytes, dml_uint_t); +dml_get_per_surface_var_decl(vm_group_size_in_bytes, dml_uint_t); +dml_get_per_surface_var_decl(swath_height_l, dml_uint_t); +dml_get_per_surface_var_decl(swath_height_c, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_l, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_c, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_linear_l, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_linear_c, dml_uint_t); +dml_get_per_surface_var_decl(meta_row_height_l, dml_uint_t); +dml_get_per_surface_var_decl(meta_row_height_c, dml_uint_t); +dml_get_per_surface_var_decl(vstartup_calculated, dml_uint_t); +dml_get_per_surface_var_decl(vupdate_offset, dml_uint_t); +dml_get_per_surface_var_decl(vupdate_width, dml_uint_t); +dml_get_per_surface_var_decl(vready_offset, dml_uint_t); +dml_get_per_surface_var_decl(vready_at_or_after_vsync, dml_uint_t); +dml_get_per_surface_var_decl(min_dst_y_next_start, dml_uint_t); +dml_get_per_surface_var_decl(det_stored_buffer_size_l_bytes, dml_uint_t); +dml_get_per_surface_var_decl(det_stored_buffer_size_c_bytes, dml_uint_t); +dml_get_per_surface_var_decl(use_mall_for_static_screen, dml_uint_t); +dml_get_per_surface_var_decl(surface_size_for_mall, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_uncompressed_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_uncompressed_block_c, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_compressed_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_compressed_block_c, dml_uint_t); +dml_get_per_surface_var_decl(dcc_independent_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_independent_block_c, dml_uint_t); +dml_get_per_surface_var_decl(max_active_dram_clock_change_latency_supported, dml_uint_t); +dml_get_per_surface_var_decl(pte_buffer_mode, dml_uint_t); +dml_get_per_surface_var_decl(bigk_fragment_size, dml_uint_t); +dml_get_per_surface_var_decl(dpte_bytes_per_row, dml_uint_t); +dml_get_per_surface_var_decl(meta_bytes_per_row, dml_uint_t); +dml_get_per_surface_var_decl(det_buffer_size_kbytes, dml_uint_t); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h new file mode 100644 index 000000000000..5b40dcdc4406 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h @@ -0,0 +1,2032 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_CORE_STRUCT_H__ +#define __DISPLAY_MODE_CORE_STRUCT_H__ + +#include "display_mode_lib_defines.h" +#include "dml_top_display_cfg_types.h" + +enum dml_project_id { + dml_project_invalid = 0, + dml_project_default = 1, + dml_project_dcn32 = dml_project_default, + dml_project_dcn321 = 2, + dml_project_dcn35 = 3, + dml_project_dcn351 = 4, + dml_project_dcn401 = 5, + dml_project_dcn36 = 6, +}; +enum dml_prefetch_modes { + dml_prefetch_support_uclk_fclk_and_stutter_if_possible = 0, + dml_prefetch_support_uclk_fclk_and_stutter = 1, + dml_prefetch_support_fclk_and_stutter = 2, + dml_prefetch_support_stutter = 3, + dml_prefetch_support_none = 4 +}; +enum dml_use_mall_for_pstate_change_mode { + dml_use_mall_pstate_change_disable = 0, + dml_use_mall_pstate_change_full_frame = 1, + dml_use_mall_pstate_change_sub_viewport = 2, + dml_use_mall_pstate_change_phantom_pipe = 3, + dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4, + dml_use_mall_pstate_change_imall = 5 +}; +enum dml_use_mall_for_static_screen_mode { + dml_use_mall_static_screen_disable = 0, + dml_use_mall_static_screen_enable = 1, + dml_use_mall_static_screen_optimize = 2 +}; +enum dml_output_encoder_class { + dml_dp = 0, + dml_edp = 1, + dml_dp2p0 = 2, + dml_hdmi = 3, + dml_hdmifrl = 4, + dml_none = 5 +}; +enum dml_output_link_dp_rate{ + dml_dp_rate_na = 0, + dml_dp_rate_hbr = 1, + dml_dp_rate_hbr2 = 2, + dml_dp_rate_hbr3 = 3, + dml_dp_rate_uhbr10 = 4, + dml_dp_rate_uhbr13p5 = 5, + dml_dp_rate_uhbr20 = 6 +}; +enum dml_output_type_and_rate__type{ + dml_output_type_unknown = 0, + dml_output_type_dp = 1, + dml_output_type_edp = 2, + dml_output_type_dp2p0 = 3, + dml_output_type_hdmi = 4, + dml_output_type_hdmifrl = 5 +}; +enum dml_output_type_and_rate__rate { + dml_output_rate_unknown = 0, + dml_output_rate_dp_rate_hbr = 1, + dml_output_rate_dp_rate_hbr2 = 2, + dml_output_rate_dp_rate_hbr3 = 3, + dml_output_rate_dp_rate_uhbr10 = 4, + dml_output_rate_dp_rate_uhbr13p5 = 5, + dml_output_rate_dp_rate_uhbr20 = 6, + dml_output_rate_hdmi_rate_3x3 = 7, + dml_output_rate_hdmi_rate_6x3 = 8, + dml_output_rate_hdmi_rate_6x4 = 9, + dml_output_rate_hdmi_rate_8x4 = 10, + dml_output_rate_hdmi_rate_10x4 = 11, + dml_output_rate_hdmi_rate_12x4 = 12 +}; +enum dml_output_format_class { + dml_444 = 0, + dml_s422 = 1, + dml_n422 = 2, + dml_420 = 3 +}; +enum dml_source_format_class { + dml_444_8 = 0, + dml_444_16 = 1, + dml_444_32 = 2, + dml_444_64 = 3, + dml_420_8 = 4, + dml_420_10 = 5, + dml_420_12 = 6, + dml_422_8 = 7, + dml_422_10 = 8, + dml_rgbe_alpha = 9, + dml_rgbe = 10, + dml_mono_8 = 11, + dml_mono_16 = 12 +}; +enum dml_output_bpc_class { + dml_out_6 = 0, + dml_out_8 = 1, + dml_out_10 = 2, + dml_out_12 = 3, + dml_out_16 = 4 +}; +enum dml_output_standard_class { + dml_std_cvt = 0, + dml_std_cea = 1, + dml_std_cvtr2 = 2 +}; +enum dml_rotation_angle { + dml_rotation_0 = 0, + dml_rotation_90 = 1, + dml_rotation_180 = 2, + dml_rotation_270 = 3, + dml_rotation_0m = 4, + dml_rotation_90m = 5, + dml_rotation_180m = 6, + dml_rotation_270m = 7 +}; +enum dml_swizzle_mode { + dml_sw_linear = 0, + dml_sw_256b_s = 1, + dml_sw_256b_d = 2, + dml_sw_256b_r = 3, + dml_sw_4kb_z = 4, + dml_sw_4kb_s = 5, + dml_sw_4kb_d = 6, + dml_sw_4kb_r = 7, + dml_sw_64kb_z = 8, + dml_sw_64kb_s = 9, + dml_sw_64kb_d = 10, + dml_sw_64kb_r = 11, + dml_sw_256kb_z = 12, + dml_sw_256kb_s = 13, + dml_sw_256kb_d = 14, + dml_sw_256kb_r = 15, + dml_sw_64kb_z_t = 16, + dml_sw_64kb_s_t = 17, + dml_sw_64kb_d_t = 18, + dml_sw_64kb_r_t = 19, + dml_sw_4kb_z_x = 20, + dml_sw_4kb_s_x = 21, + dml_sw_4kb_d_x = 22, + dml_sw_4kb_r_x = 23, + dml_sw_64kb_z_x = 24, + dml_sw_64kb_s_x = 25, + dml_sw_64kb_d_x = 26, + dml_sw_64kb_r_x = 27, + dml_sw_256kb_z_x = 28, + dml_sw_256kb_s_x = 29, + dml_sw_256kb_d_x = 30, + dml_sw_256kb_r_x = 31, + dml_sw_256b_2d = 32, + dml_sw_4kb_2d = 33, + dml_sw_64kb_2d = 34, + dml_sw_256kb_2d = 35 +}; +enum dml_lb_depth { + dml_lb_6 = 0, + dml_lb_8 = 1, + dml_lb_10 = 2, + dml_lb_12 = 3, + dml_lb_16 = 4 +}; +enum dml_voltage_state { + dml_vmin_lv = 0, + dml_vmin = 1, + dml_vmid = 2, + dml_vnom = 3, + dml_vmax = 4 +}; +enum dml_source_macro_tile_size { + dml_4k_tile = 0, + dml_64k_tile = 1, + dml_256k_tile = 2 +}; +enum dml_cursor_bpp { + dml_cur_2bit = 0, + dml_cur_32bit = 1, + dml_cur_64bit = 2 +}; +enum dml_dram_clock_change_support { + dml_dram_clock_change_vactive = 0, + dml_dram_clock_change_vblank = 1, + dml_dram_clock_change_vblank_drr = 2, + dml_dram_clock_change_vactive_w_mall_full_frame = 3, + dml_dram_clock_change_vactive_w_mall_sub_vp = 4, + dml_dram_clock_change_vblank_w_mall_full_frame = 5, + dml_dram_clock_change_vblank_drr_w_mall_full_frame = 6, + dml_dram_clock_change_vblank_w_mall_sub_vp = 7, + dml_dram_clock_change_vblank_drr_w_mall_sub_vp = 8, + dml_dram_clock_change_unsupported = 9 +}; +enum dml_fclock_change_support { + dml_fclock_change_vactive = 0, + dml_fclock_change_vblank = 1, + dml_fclock_change_unsupported = 2 +}; +enum dml_dsc_enable { + dml_dsc_disable = 0, + dml_dsc_enable = 1, + dml_dsc_enable_if_necessary = 2 +}; +enum dml_mpc_use_policy { + dml_mpc_disabled = 0, + dml_mpc_as_possible = 1, + dml_mpc_as_needed_for_voltage = 2, + dml_mpc_as_needed_for_pstate_and_voltage = 3, + dml_mpc_as_needed = 4, + dml_mpc_2to1 = 5 +}; +enum dml_odm_use_policy { + dml_odm_use_policy_bypass = 0, + dml_odm_use_policy_combine_as_needed = 1, + dml_odm_use_policy_combine_2to1 = 2, + dml_odm_use_policy_combine_3to1 = 3, + dml_odm_use_policy_combine_4to1 = 4, + dml_odm_use_policy_split_1to2 = 5, + dml_odm_use_policy_mso_1to2 = 6, + dml_odm_use_policy_mso_1to4 = 7 +}; +enum dml_odm_mode { + dml_odm_mode_bypass = 0, + dml_odm_mode_combine_2to1 = 1, + dml_odm_mode_combine_3to1 = 2, + dml_odm_mode_combine_4to1 = 3, + dml_odm_mode_split_1to2 = 4, + dml_odm_mode_mso_1to2 = 5, + dml_odm_mode_mso_1to4 = 6 +}; +enum dml_writeback_configuration { + dml_whole_buffer_for_single_stream_no_interleave = 0, + dml_whole_buffer_for_single_stream_interleave = 1 +}; +enum dml_immediate_flip_requirement { + dml_immediate_flip_not_required = 0, + dml_immediate_flip_required = 1, + dml_immediate_flip_if_possible = 2 +}; +enum dml_unbounded_requesting_policy { + dml_unbounded_requesting_enable = 0, + dml_unbounded_requesting_edp_only = 1, + dml_unbounded_requesting_disable = 2 +}; +enum dml_clk_cfg_policy { + dml_use_required_freq = 0, + dml_use_override_freq = 1, + dml_use_state_freq = 2 +}; + +struct soc_state_bounding_box_st { + dml_float_t socclk_mhz; + dml_float_t dscclk_mhz; + dml_float_t phyclk_mhz; + dml_float_t phyclk_d18_mhz; + dml_float_t phyclk_d32_mhz; + dml_float_t dtbclk_mhz; + dml_float_t fabricclk_mhz; + dml_float_t dcfclk_mhz; + dml_float_t dispclk_mhz; + dml_float_t dppclk_mhz; + dml_float_t dram_speed_mts; + dml_float_t urgent_latency_pixel_data_only_us; + dml_float_t urgent_latency_pixel_mixed_with_vm_data_us; + dml_float_t urgent_latency_vm_data_only_us; + dml_float_t writeback_latency_us; + dml_float_t urgent_latency_adjustment_fabric_clock_component_us; + dml_float_t urgent_latency_adjustment_fabric_clock_reference_mhz; + dml_float_t sr_exit_time_us; + dml_float_t sr_enter_plus_exit_time_us; + dml_float_t sr_exit_z8_time_us; + dml_float_t sr_enter_plus_exit_z8_time_us; + dml_float_t dram_clock_change_latency_us; + dml_float_t fclk_change_latency_us; + dml_float_t usr_retraining_latency_us; + dml_bool_t use_ideal_dram_bw_strobe; + dml_float_t g6_temp_read_blackout_us; + + struct { + dml_uint_t urgent_ramp_uclk_cycles; + dml_uint_t trip_to_memory_uclk_cycles; + dml_uint_t meta_trip_to_memory_uclk_cycles; + dml_uint_t maximum_latency_when_urgent_uclk_cycles; + dml_uint_t average_latency_when_urgent_uclk_cycles; + dml_uint_t maximum_latency_when_non_urgent_uclk_cycles; + dml_uint_t average_latency_when_non_urgent_uclk_cycles; + } dml_dcn401_uclk_dpm_dependent_soc_qos_params; +}; + +struct soc_bounding_box_st { + dml_float_t dprefclk_mhz; + dml_float_t xtalclk_mhz; + dml_float_t pcierefclk_mhz; + dml_float_t refclk_mhz; + dml_float_t amclk_mhz; + dml_uint_t max_outstanding_reqs; + dml_float_t pct_ideal_sdp_bw_after_urgent; + dml_float_t pct_ideal_fabric_bw_after_urgent; + dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t pct_ideal_dram_bw_after_urgent_strobe; + dml_float_t max_avg_sdp_bw_use_normal_percent; + dml_float_t max_avg_fabric_bw_use_normal_percent; + dml_float_t max_avg_dram_bw_use_normal_percent; + dml_float_t max_avg_dram_bw_use_normal_strobe_percent; + + dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent; + + dml_uint_t round_trip_ping_latency_dcfclk_cycles; + dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes; + dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + dml_uint_t urgent_out_of_order_return_per_channel_vm_only_bytes; + dml_uint_t num_chans; + dml_uint_t return_bus_width_bytes; + dml_uint_t dram_channel_width_bytes; + dml_uint_t fabric_datapath_to_dcn_data_return_bytes; + dml_uint_t hostvm_min_page_size_kbytes; + dml_uint_t gpuvm_min_page_size_kbytes; + dml_float_t phy_downspread_percent; + dml_float_t dcn_downspread_percent; + dml_float_t smn_latency_us; + dml_uint_t mall_allocated_for_dcn_mbytes; + dml_float_t dispclk_dppclk_vco_speed_mhz; + dml_bool_t do_urgent_latency_adjustment; + + dml_uint_t mem_word_bytes; + dml_uint_t num_dcc_mcaches; + dml_uint_t mcache_size_bytes; + dml_uint_t mcache_line_size_bytes; + + struct { + dml_bool_t UseNewDCN401SOCParameters; + dml_uint_t df_qos_response_time_fclk_cycles; + dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles; + dml_uint_t mall_overhead_fclk_cycles; + dml_uint_t meta_trip_adder_fclk_cycles; + dml_uint_t average_transport_distance_fclk_cycles; + dml_float_t umc_urgent_ramp_latency_margin; + dml_float_t umc_max_latency_margin; + dml_float_t umc_average_latency_margin; + dml_float_t fabric_max_transport_latency_margin; + dml_float_t fabric_average_transport_latency_margin; + } dml_dcn401_soc_qos_params; + +}; + +struct ip_params_st { + dml_uint_t vblank_nom_default_us; + dml_uint_t rob_buffer_size_kbytes; + dml_uint_t config_return_buffer_size_in_kbytes; + dml_uint_t config_return_buffer_segment_size_in_kbytes; + dml_uint_t compressed_buffer_segment_size_in_kbytes; + dml_uint_t meta_fifo_size_in_kentries; + dml_uint_t zero_size_buffer_entries; + dml_uint_t dpte_buffer_size_in_pte_reqs_luma; + dml_uint_t dpte_buffer_size_in_pte_reqs_chroma; + dml_uint_t dcc_meta_buffer_size_bytes; + dml_bool_t gpuvm_enable; + dml_bool_t hostvm_enable; + dml_uint_t gpuvm_max_page_table_levels; + dml_uint_t hostvm_max_page_table_levels; + dml_uint_t pixel_chunk_size_kbytes; + dml_uint_t alpha_pixel_chunk_size_kbytes; + dml_uint_t min_pixel_chunk_size_bytes; + dml_uint_t meta_chunk_size_kbytes; + dml_uint_t min_meta_chunk_size_bytes; + dml_uint_t writeback_chunk_size_kbytes; + dml_uint_t line_buffer_size_bits; + dml_uint_t max_line_buffer_lines; + dml_uint_t writeback_interface_buffer_size_kbytes; + dml_uint_t max_num_dpp; + dml_uint_t max_num_otg; + dml_uint_t max_num_wb; + dml_uint_t max_dchub_pscl_bw_pix_per_clk; + dml_uint_t max_pscl_lb_bw_pix_per_clk; + dml_uint_t max_lb_vscl_bw_pix_per_clk; + dml_uint_t max_vscl_hscl_bw_pix_per_clk; + dml_float_t max_hscl_ratio; + dml_float_t max_vscl_ratio; + dml_uint_t max_hscl_taps; + dml_uint_t max_vscl_taps; + dml_uint_t num_dsc; + dml_uint_t maximum_dsc_bits_per_component; + dml_uint_t maximum_pixels_per_line_per_dsc_unit; + dml_bool_t dsc422_native_support; + dml_bool_t cursor_64bpp_support; + dml_float_t dispclk_ramp_margin_percent; + dml_uint_t dppclk_delay_subtotal; + dml_uint_t dppclk_delay_scl; + dml_uint_t dppclk_delay_scl_lb_only; + dml_uint_t dppclk_delay_cnvc_formatter; + dml_uint_t dppclk_delay_cnvc_cursor; + dml_uint_t cursor_buffer_size; + dml_uint_t cursor_chunk_size; + dml_uint_t dispclk_delay_subtotal; + dml_bool_t dynamic_metadata_vm_enabled; + dml_uint_t max_inter_dcn_tile_repeaters; + dml_uint_t max_num_hdmi_frl_outputs; + dml_uint_t max_num_dp2p0_outputs; + dml_uint_t max_num_dp2p0_streams; + dml_bool_t dcc_supported; + dml_bool_t ptoi_supported; + dml_float_t writeback_max_hscl_ratio; + dml_float_t writeback_max_vscl_ratio; + dml_float_t writeback_min_hscl_ratio; + dml_float_t writeback_min_vscl_ratio; + dml_uint_t writeback_max_hscl_taps; + dml_uint_t writeback_max_vscl_taps; + dml_uint_t writeback_line_buffer_buffer_size; +}; + +struct DmlPipe { + dml_float_t Dppclk; + dml_float_t Dispclk; + dml_float_t PixelClock; + dml_float_t DCFClkDeepSleep; + dml_uint_t DPPPerSurface; + dml_bool_t ScalerEnabled; + enum dml_rotation_angle SourceScan; + dml_uint_t ViewportHeight; + dml_uint_t ViewportHeightChroma; + dml_uint_t BlockWidth256BytesY; + dml_uint_t BlockHeight256BytesY; + dml_uint_t BlockWidth256BytesC; + dml_uint_t BlockHeight256BytesC; + dml_uint_t BlockWidthY; + dml_uint_t BlockHeightY; + dml_uint_t BlockWidthC; + dml_uint_t BlockHeightC; + dml_uint_t InterlaceEnable; + dml_uint_t NumberOfCursors; + dml_uint_t VBlank; + dml_uint_t HTotal; + dml_uint_t HActive; + dml_bool_t DCCEnable; + enum dml_odm_mode ODMMode; + enum dml_source_format_class SourcePixelFormat; + enum dml_swizzle_mode SurfaceTiling; + dml_uint_t BytePerPixelY; + dml_uint_t BytePerPixelC; + dml_bool_t ProgressiveToInterlaceUnitInOPP; + dml_float_t VRatio; + dml_float_t VRatioChroma; + dml_uint_t VTaps; + dml_uint_t VTapsChroma; + dml_uint_t PitchY; + dml_uint_t DCCMetaPitchY; + dml_uint_t PitchC; + dml_uint_t DCCMetaPitchC; + dml_bool_t ViewportStationary; + dml_uint_t ViewportXStart; + dml_uint_t ViewportYStart; + dml_uint_t ViewportXStartC; + dml_uint_t ViewportYStartC; + dml_bool_t FORCE_ONE_ROW_FOR_FRAME; + dml_uint_t SwathHeightY; + dml_uint_t SwathHeightC; +}; + +struct Watermarks { + dml_float_t UrgentWatermark; + dml_float_t WritebackUrgentWatermark; + dml_float_t DRAMClockChangeWatermark; + dml_float_t FCLKChangeWatermark; + dml_float_t WritebackDRAMClockChangeWatermark; + dml_float_t WritebackFCLKChangeWatermark; + dml_float_t StutterExitWatermark; + dml_float_t StutterEnterPlusExitWatermark; + dml_float_t Z8StutterExitWatermark; + dml_float_t Z8StutterEnterPlusExitWatermark; + dml_float_t USRRetrainingWatermark; +}; + +struct SOCParametersList { + dml_float_t UrgentLatency; + dml_float_t ExtraLatency; + dml_float_t WritebackLatency; + dml_float_t DRAMClockChangeLatency; + dml_float_t FCLKChangeLatency; + dml_float_t SRExitTime; + dml_float_t SREnterPlusExitTime; + dml_float_t SRExitZ8Time; + dml_float_t SREnterPlusExitZ8Time; + dml_float_t USRRetrainingLatency; + dml_float_t SMNLatency; +}; + +/// @brief Struct that represent Plane configration of a display cfg +struct dml_plane_cfg_st { + // + // Pipe/Surface Parameters + // + dml_bool_t GPUVMEnable; /// <brief Set if any pipe has GPUVM enable + dml_bool_t HostVMEnable; /// <brief Set if any pipe has HostVM enable + + dml_uint_t GPUVMMaxPageTableLevels; /// <brief GPUVM level; max of all pipes' + dml_uint_t HostVMMaxPageTableLevels; /// <brief HostVM level; max of all pipes'; that is the number of non-cache HVM level + + dml_uint_t GPUVMMinPageSizeKBytes[__DML_NUM_PLANES__]; + dml_bool_t ForceOneRowForFrame[__DML_NUM_PLANES__]; + dml_bool_t PTEBufferModeOverrideEn[__DML_NUM_PLANES__]; //< brief when override enable; the DML will only check the given pte buffer and will use the pte buffer mode as is + dml_bool_t PTEBufferMode[__DML_NUM_PLANES__]; + dml_uint_t ViewportWidth[__DML_NUM_PLANES__]; + dml_uint_t ViewportHeight[__DML_NUM_PLANES__]; + dml_uint_t ViewportWidthChroma[__DML_NUM_PLANES__]; + dml_uint_t ViewportHeightChroma[__DML_NUM_PLANES__]; + dml_uint_t ViewportXStart[__DML_NUM_PLANES__]; + dml_uint_t ViewportXStartC[__DML_NUM_PLANES__]; + dml_uint_t ViewportYStart[__DML_NUM_PLANES__]; + dml_uint_t ViewportYStartC[__DML_NUM_PLANES__]; + dml_bool_t ViewportStationary[__DML_NUM_PLANES__]; + + dml_bool_t ScalerEnabled[__DML_NUM_PLANES__]; + dml_float_t HRatio[__DML_NUM_PLANES__]; + dml_float_t VRatio[__DML_NUM_PLANES__]; + dml_float_t HRatioChroma[__DML_NUM_PLANES__]; + dml_float_t VRatioChroma[__DML_NUM_PLANES__]; + dml_uint_t HTaps[__DML_NUM_PLANES__]; + dml_uint_t VTaps[__DML_NUM_PLANES__]; + dml_uint_t HTapsChroma[__DML_NUM_PLANES__]; + dml_uint_t VTapsChroma[__DML_NUM_PLANES__]; + dml_uint_t LBBitPerPixel[__DML_NUM_PLANES__]; + + enum dml_rotation_angle SourceScan[__DML_NUM_PLANES__]; + dml_uint_t ScalerRecoutWidth[__DML_NUM_PLANES__]; + + dml_bool_t DynamicMetadataEnable[__DML_NUM_PLANES__]; + dml_uint_t DynamicMetadataLinesBeforeActiveRequired[__DML_NUM_PLANES__]; + dml_uint_t DynamicMetadataTransmittedBytes[__DML_NUM_PLANES__]; + dml_uint_t DETSizeOverride[__DML_NUM_PLANES__]; /// <brief user can specify the desire DET buffer usage per-plane + + dml_uint_t NumberOfCursors[__DML_NUM_PLANES__]; + dml_uint_t CursorWidth[__DML_NUM_PLANES__]; + dml_uint_t CursorBPP[__DML_NUM_PLANES__]; + + dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__]; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__]; + enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__]; + + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__]; + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__]; + + dml_uint_t BlendingAndTiming[__DML_NUM_PLANES__]; /// <brief From which timing group (like OTG) that this plane is getting its timing from. Mode check also need this info for example to check num OTG; encoder; dsc etc. +}; // dml_plane_cfg_st; + +/// @brief Surface Parameters +struct dml_surface_cfg_st { + enum dml_swizzle_mode SurfaceTiling[__DML_NUM_PLANES__]; + enum dml_source_format_class SourcePixelFormat[__DML_NUM_PLANES__]; + dml_uint_t PitchY[__DML_NUM_PLANES__]; + dml_uint_t SurfaceWidthY[__DML_NUM_PLANES__]; + dml_uint_t SurfaceHeightY[__DML_NUM_PLANES__]; + dml_uint_t PitchC[__DML_NUM_PLANES__]; + dml_uint_t SurfaceWidthC[__DML_NUM_PLANES__]; + dml_uint_t SurfaceHeightC[__DML_NUM_PLANES__]; + + dml_bool_t DCCEnable[__DML_NUM_PLANES__]; + dml_uint_t DCCMetaPitchY[__DML_NUM_PLANES__]; + dml_uint_t DCCMetaPitchC[__DML_NUM_PLANES__]; + + dml_float_t DCCRateLuma[__DML_NUM_PLANES__]; + dml_float_t DCCRateChroma[__DML_NUM_PLANES__]; + dml_float_t DCCFractionOfZeroSizeRequestsLuma[__DML_NUM_PLANES__]; + dml_float_t DCCFractionOfZeroSizeRequestsChroma[__DML_NUM_PLANES__]; +}; // dml_surface_cfg_st + +/// @brief structure that represents the timing configuration +struct dml_timing_cfg_st { + dml_uint_t HTotal[__DML_NUM_PLANES__]; + dml_uint_t VTotal[__DML_NUM_PLANES__]; + dml_uint_t HBlankEnd[__DML_NUM_PLANES__]; + dml_uint_t VBlankEnd[__DML_NUM_PLANES__]; + dml_uint_t RefreshRate[__DML_NUM_PLANES__]; + dml_uint_t VFrontPorch[__DML_NUM_PLANES__]; + dml_float_t PixelClock[__DML_NUM_PLANES__]; + dml_uint_t HActive[__DML_NUM_PLANES__]; + dml_uint_t VActive[__DML_NUM_PLANES__]; + dml_bool_t Interlace[__DML_NUM_PLANES__]; + dml_bool_t DRRDisplay[__DML_NUM_PLANES__]; + dml_uint_t VBlankNom[__DML_NUM_PLANES__]; +}; // dml_timing_cfg_st; + +/// @brief structure that represents the output stream +struct dml_output_cfg_st { + // Output Setting + dml_uint_t DSCInputBitPerComponent[__DML_NUM_PLANES__]; + enum dml_output_format_class OutputFormat[__DML_NUM_PLANES__]; + enum dml_output_encoder_class OutputEncoder[__DML_NUM_PLANES__]; + dml_uint_t OutputMultistreamId[__DML_NUM_PLANES__]; + dml_bool_t OutputMultistreamEn[__DML_NUM_PLANES__]; + dml_float_t OutputBpp[__DML_NUM_PLANES__]; //< brief Use by mode_programming to specify a output bpp; user can use the output from mode_support (support.OutputBpp) + dml_float_t PixelClockBackEnd[__DML_NUM_PLANES__]; + enum dml_dsc_enable DSCEnable[__DML_NUM_PLANES__]; //< brief for mode support check; use to determine if dsc is required + dml_uint_t OutputLinkDPLanes[__DML_NUM_PLANES__]; + enum dml_output_link_dp_rate OutputLinkDPRate[__DML_NUM_PLANES__]; + dml_float_t ForcedOutputLinkBPP[__DML_NUM_PLANES__]; + dml_uint_t AudioSampleRate[__DML_NUM_PLANES__]; + dml_uint_t AudioSampleLayout[__DML_NUM_PLANES__]; + dml_bool_t OutputDisabled[__DML_NUM_PLANES__]; + dml_uint_t DSCSlices[__DML_NUM_PLANES__]; +}; // dml_timing_cfg_st; + +/// @brief Writeback Setting +struct dml_writeback_cfg_st { + enum dml_source_format_class WritebackPixelFormat[__DML_NUM_PLANES__]; + dml_bool_t WritebackEnable[__DML_NUM_PLANES__]; + dml_uint_t ActiveWritebacksPerSurface[__DML_NUM_PLANES__]; + dml_uint_t WritebackDestinationWidth[__DML_NUM_PLANES__]; + dml_uint_t WritebackDestinationHeight[__DML_NUM_PLANES__]; + dml_uint_t WritebackSourceWidth[__DML_NUM_PLANES__]; + dml_uint_t WritebackSourceHeight[__DML_NUM_PLANES__]; + dml_uint_t WritebackHTaps[__DML_NUM_PLANES__]; + dml_uint_t WritebackVTaps[__DML_NUM_PLANES__]; + dml_float_t WritebackHRatio[__DML_NUM_PLANES__]; + dml_float_t WritebackVRatio[__DML_NUM_PLANES__]; +}; // dml_writeback_cfg_st; + +/// @brief Hardware resource specific; mainly used by mode_programming when test/sw wants to do some specific setting +/// which are not the same as what the mode support stage derive. When call mode_support with mode_programm; the hw-specific +// resource will be set to what the mode_support layer recommends +struct dml_hw_resource_st { + enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is enabled; used in mode_programming + dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode + dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer +}; + +/// @brief To control the clk usage for model programming +struct dml_clk_cfg_st { + enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__]; + + dml_float_t dcfclk_mhz; + dml_float_t dispclk_mhz; + dml_float_t dppclk_mhz[__DML_NUM_PLANES__]; +}; // dml_clk_cfg_st + +/// @brief DML display configuration. +/// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin +struct dml_display_cfg_st { + struct dml_surface_cfg_st surface; + struct dml_plane_cfg_st plane; + struct dml_timing_cfg_st timing; + struct dml_output_cfg_st output; + struct dml_writeback_cfg_st writeback; + unsigned int num_surfaces; + unsigned int num_timings; + + struct dml_hw_resource_st hw; //< brief for mode programming + struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override +}; // dml_display_cfg_st + +/// @brief DML mode evaluation and programming policy +/// Those knobs that affect mode support and mode programming +struct dml_mode_eval_policy_st { + // ------------------- + // Policy + // ------------------- + enum dml_mpc_use_policy MPCCombineUse[__DML_NUM_PLANES__]; /// <brief MPC Combine mode as selected by the user; used in mode check stage + enum dml_odm_use_policy ODMUse[__DML_NUM_PLANES__]; /// <brief ODM mode as selected by the user; used in mode check stage + enum dml_unbounded_requesting_policy UseUnboundedRequesting; ///< brief Unbounded request mode preference + enum dml_immediate_flip_requirement ImmediateFlipRequirement[__DML_NUM_PLANES__]; /// <brief Is immediate flip a requirement for this plane. When host vm is present iflip is needed regardless + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank[__DML_NUM_PLANES__]; /// <brief To specify if the DML should calculate the values for support different pwr saving features (cstate; pstate; etc.) during vblank + + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal; + bool UseOnlyMaxPrefetchModes; + dml_bool_t UseMinimumRequiredDCFCLK; //<brief When set the mode_check stage will figure the min DCFCLK freq to support the given display configuration. User can tell use the output DCFCLK for mode programming. + dml_bool_t DRAMClockChangeRequirementFinal; + dml_bool_t FCLKChangeRequirementFinal; + dml_bool_t USRRetrainingRequiredFinal; + dml_bool_t EnhancedPrefetchScheduleAccelerationFinal; + + dml_bool_t NomDETInKByteOverrideEnable; //<brief Nomimal DET buffer size for a pipe. If this size fit the required 2 swathes; DML will use this DET size + dml_uint_t NomDETInKByteOverrideValue; + + dml_bool_t DCCProgrammingAssumesScanDirectionUnknownFinal; + dml_bool_t SynchronizeTimingsFinal; + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + dml_bool_t AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the DRAM clock change is not support (assuming the soc will be stay in max power state) + dml_bool_t AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the Fabric clock change is not support (assuming the soc will be stay in max power state +}; + +/// @brief Contains important information after the mode support steps. Also why a mode is not supported. +struct dml_mode_support_info_st { + //----------------- + // Mode Support Information + //----------------- + dml_bool_t ModeIsSupported; //<brief Is the mode support any voltage and combine setting + dml_bool_t ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming + dml_uint_t MaximumMPCCombine; //<brief If using MPC combine helps the power saving support; then this will be set to 1 + dml_bool_t UnboundedRequestEnabled; + dml_uint_t CompressedBufferSizeInkByte; + + /* Mode Support Reason */ + dml_bool_t WritebackLatencySupport; + dml_bool_t ScaleRatioAndTapsSupport; + dml_bool_t SourceFormatPixelAndScanSupport; + dml_bool_t MPCCombineMethodIncompatible; + dml_bool_t P2IWith420; + dml_bool_t DSCOnlyIfNecessaryWithBPP; + dml_bool_t DSC422NativeNotSupported; + dml_bool_t LinkRateDoesNotMatchDPVersion; + dml_bool_t LinkRateForMultistreamNotIndicated; + dml_bool_t BPPForMultistreamNotIndicated; + dml_bool_t MultistreamWithHDMIOreDP; + dml_bool_t MSOOrODMSplitWithNonDPLink; + dml_bool_t NotEnoughLanesForMSO; + dml_bool_t NumberOfOTGSupport; + dml_bool_t NumberOfHDMIFRLSupport; + dml_bool_t NumberOfDP2p0Support; + dml_bool_t NonsupportedDSCInputBPC; + dml_bool_t WritebackScaleRatioAndTapsSupport; + dml_bool_t CursorSupport; + dml_bool_t PitchSupport; + dml_bool_t ViewportExceedsSurface; + dml_bool_t ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified; + dml_bool_t ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + dml_bool_t InvalidCombinationOfMALLUseForPStateAndStaticScreen; + dml_bool_t InvalidCombinationOfMALLUseForPState; + dml_bool_t ExceededMALLSize; + dml_bool_t EnoughWritebackUnits; + + dml_bool_t ExceededMultistreamSlots; + dml_bool_t ODMCombineTwoToOneSupportCheckOK; + dml_bool_t ODMCombineFourToOneSupportCheckOK; + dml_bool_t NotEnoughDSCUnits; + dml_bool_t NotEnoughDSCSlices; + dml_bool_t PixelsPerLinePerDSCUnitSupport; + dml_bool_t DSCCLKRequiredMoreThanSupported; + dml_bool_t DTBCLKRequiredMoreThanSupported; + dml_bool_t LinkCapacitySupport; + + dml_bool_t ROBSupport[2]; + dml_bool_t PTEBufferSizeNotExceeded[2]; + dml_bool_t DCCMetaBufferSizeNotExceeded[2]; + dml_bool_t TotalVerticalActiveBandwidthSupport[2]; + enum dml_dram_clock_change_support DRAMClockChangeSupport[2]; + dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__]; + dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__]; + enum dml_fclock_change_support FCLKChangeSupport[2]; + dml_bool_t USRRetrainingSupport[2]; + dml_bool_t VActiveBandwithSupport[2]; + dml_bool_t PrefetchSupported[2]; + dml_bool_t DynamicMetadataSupported[2]; + dml_bool_t VRatioInPrefetchSupported[2]; + dml_bool_t DISPCLK_DPPCLK_Support[2]; + dml_bool_t TotalAvailablePipesSupport[2]; + dml_bool_t ModeSupport[2]; + dml_bool_t ViewportSizeSupport[2]; + dml_bool_t ImmediateFlipSupportedForState[2]; + + dml_bool_t NoTimeForPrefetch[2][__DML_NUM_PLANES__]; + dml_bool_t NoTimeForDynamicMetadata[2][__DML_NUM_PLANES__]; + + dml_bool_t MPCCombineEnable[__DML_NUM_PLANES__]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting + enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is actually required; used in mode_programming + dml_bool_t FECEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the FEC is actually required + dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode + + dml_float_t OutputBpp[__DML_NUM_PLANES__]; + enum dml_output_type_and_rate__type OutputType[__DML_NUM_PLANES__]; + enum dml_output_type_and_rate__rate OutputRate[__DML_NUM_PLANES__]; + + dml_float_t AlignedDCCMetaPitchY[__DML_NUM_PLANES__]; /// <brief Pitch value that is aligned to tiling setting + dml_float_t AlignedDCCMetaPitchC[__DML_NUM_PLANES__]; + dml_float_t AlignedYPitch[__DML_NUM_PLANES__]; + dml_float_t AlignedCPitch[__DML_NUM_PLANES__]; + dml_float_t MaxTotalVerticalActiveAvailableBandwidth[2]; /// <brief nominal bw available for display +}; // dml_mode_support_info_st + +/// @brief Treat this as the intermediate values and outputs of mode check function. User can query the content of the struct to know more about the result of mode evaluation. +struct mode_support_st { + struct ip_params_st ip; + struct soc_bounding_box_st soc; + struct soc_state_bounding_box_st state; //<brief Per-state bbox values; only 1 state per compute + struct dml_mode_eval_policy_st policy; + + dml_uint_t state_idx; //<brief The power state idx for the power state under this computation + dml_uint_t max_state_idx; //<brief The MAX power state idx + struct soc_state_bounding_box_st max_state; //<brief The MAX power state; some algo needs to know the max state info to determine if + struct dml_display_cfg_st cache_display_cfg; // <brief A copy of the current display cfg in consideration + + // Physical info; only using for programming + dml_uint_t num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg + + // Calculated Clocks + dml_float_t RequiredDISPCLK[2]; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc. + dml_float_t RequiredDPPCLKThisState[__DML_NUM_PLANES__]; + dml_float_t DCFCLKState[2]; /// <brief recommended DCFCLK freq; calculated by DML. If UseMinimumRequiredDCFCLK is not set; then it will be just the state DCFCLK; else it will min DCFCLK for support + dml_float_t RequiredDISPCLKPerSurface[2][__DML_NUM_PLANES__]; + dml_float_t RequiredDPPCLKPerSurface[2][__DML_NUM_PLANES__]; + + dml_float_t FabricClock; /// <brief Basically just the clock freq at the min (or given) state + dml_float_t DRAMSpeed; /// <brief Basically just the clock freq at the min (or given) state + dml_float_t SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + dml_float_t DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + dml_float_t GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes + + // ---------------------------------- + // Mode Support Info and fail reason + // ---------------------------------- + struct dml_mode_support_info_st support; + + // These are calculated before the ModeSupport and ModeProgram step + // They represent the bound for the return buffer sizing + dml_uint_t MaxTotalDETInKByte; + dml_uint_t NomDETInKByte; + dml_uint_t MinCompressedBufferSizeInKByte; + + // Info obtained at the end of mode support calculations + // The reported info is at the "optimal" state and combine setting + dml_float_t ReturnBW; + dml_float_t ReturnDRAMBW; + dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightY[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightC[__DML_NUM_PLANES__]; + + // ---------------------------------- + // Intermediates/Informational + // ---------------------------------- + dml_uint_t TotImmediateFlipBytes; + dml_bool_t DCCEnabledInAnySurface; + dml_float_t WritebackRequiredDISPCLK; + dml_float_t TimeCalc; + dml_float_t TWait; + + dml_uint_t SwathWidthYAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t SwathWidthCAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t SwathHeightYAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t SwathHeightCAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t SwathWidthYThisState[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthCThisState[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightYThisState[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightCThisState[__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeInKByteAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeYAllStates[2][__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeCAllStates[2][__DML_NUM_PLANES__]; + dml_bool_t UnboundedRequestEnabledAllStates[2]; + dml_uint_t CompressedBufferSizeInkByteAllStates[2]; + dml_bool_t UnboundedRequestEnabledThisState; + dml_uint_t CompressedBufferSizeInkByteThisState; + dml_uint_t DETBufferSizeInKByteThisState[__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeYThisState[__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeCThisState[__DML_NUM_PLANES__]; + dml_float_t VRatioPreY[2][__DML_NUM_PLANES__]; + dml_float_t VRatioPreC[2][__DML_NUM_PLANES__]; + dml_uint_t swath_width_luma_ub_all_states[2][__DML_NUM_PLANES__]; + dml_uint_t swath_width_chroma_ub_all_states[2][__DML_NUM_PLANES__]; + dml_uint_t swath_width_luma_ub_this_state[__DML_NUM_PLANES__]; + dml_uint_t swath_width_chroma_ub_this_state[__DML_NUM_PLANES__]; + dml_uint_t RequiredSlots[__DML_NUM_PLANES__]; + dml_uint_t PDEAndMetaPTEBytesPerFrame[2][__DML_NUM_PLANES__]; + dml_uint_t MetaRowBytes[2][__DML_NUM_PLANES__]; + dml_uint_t DPTEBytesPerRow[2][__DML_NUM_PLANES__]; + dml_uint_t PrefetchLinesY[2][__DML_NUM_PLANES__]; + dml_uint_t PrefetchLinesC[2][__DML_NUM_PLANES__]; + dml_uint_t MaxNumSwY[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch + dml_uint_t MaxNumSwC[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch + dml_uint_t PrefillY[__DML_NUM_PLANES__]; + dml_uint_t PrefillC[__DML_NUM_PLANES__]; + + dml_uint_t PrefetchLinesYThisState[__DML_NUM_PLANES__]; + dml_uint_t PrefetchLinesCThisState[__DML_NUM_PLANES__]; + dml_uint_t DPTEBytesPerRowThisState[__DML_NUM_PLANES__]; + dml_uint_t PDEAndMetaPTEBytesPerFrameThisState[__DML_NUM_PLANES__]; + dml_uint_t MetaRowBytesThisState[__DML_NUM_PLANES__]; + dml_bool_t use_one_row_for_frame[2][__DML_NUM_PLANES__]; + dml_bool_t use_one_row_for_frame_flip[2][__DML_NUM_PLANES__]; + dml_bool_t use_one_row_for_frame_this_state[__DML_NUM_PLANES__]; + dml_bool_t use_one_row_for_frame_flip_this_state[__DML_NUM_PLANES__]; + + dml_float_t LineTimesForPrefetch[__DML_NUM_PLANES__]; + dml_float_t LinesForMetaPTE[__DML_NUM_PLANES__]; + dml_float_t LinesForMetaAndDPTERow[__DML_NUM_PLANES__]; + dml_float_t SwathWidthYSingleDPP[__DML_NUM_PLANES__]; + dml_float_t SwathWidthCSingleDPP[__DML_NUM_PLANES__]; + dml_uint_t BytePerPixelY[__DML_NUM_PLANES__]; + dml_uint_t BytePerPixelC[__DML_NUM_PLANES__]; + dml_float_t BytePerPixelInDETY[__DML_NUM_PLANES__]; + dml_float_t BytePerPixelInDETC[__DML_NUM_PLANES__]; + + dml_uint_t Read256BlockHeightY[__DML_NUM_PLANES__]; + dml_uint_t Read256BlockWidthY[__DML_NUM_PLANES__]; + dml_uint_t Read256BlockHeightC[__DML_NUM_PLANES__]; + dml_uint_t Read256BlockWidthC[__DML_NUM_PLANES__]; + dml_uint_t MacroTileHeightY[__DML_NUM_PLANES__]; + dml_uint_t MacroTileHeightC[__DML_NUM_PLANES__]; + dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__]; + dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__]; + dml_float_t PSCL_FACTOR[__DML_NUM_PLANES__]; + dml_float_t PSCL_FACTOR_CHROMA[__DML_NUM_PLANES__]; + dml_float_t MaximumSwathWidthLuma[__DML_NUM_PLANES__]; + dml_float_t MaximumSwathWidthChroma[__DML_NUM_PLANES__]; + dml_float_t Tno_bw[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__]; + dml_float_t WritebackDelayTime[__DML_NUM_PLANES__]; + dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__]; + dml_uint_t meta_row_height[__DML_NUM_PLANES__]; + dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__]; + dml_float_t UrgLatency; + dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__]; + dml_float_t MaximumSwathWidthInLineBufferLuma; + dml_float_t MaximumSwathWidthInLineBufferChroma; + dml_float_t ExtraLatency; + + // Backend + dml_bool_t RequiresDSC[__DML_NUM_PLANES__]; + dml_bool_t RequiresFEC[__DML_NUM_PLANES__]; + dml_float_t OutputBppPerState[__DML_NUM_PLANES__]; + dml_uint_t DSCDelayPerState[__DML_NUM_PLANES__]; + enum dml_output_type_and_rate__type OutputTypePerState[__DML_NUM_PLANES__]; + enum dml_output_type_and_rate__rate OutputRatePerState[__DML_NUM_PLANES__]; + + // Bandwidth Related Info + dml_float_t BandwidthAvailableForImmediateFlip; + dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__]; + dml_float_t WriteBandwidth[__DML_NUM_PLANES__]; + dml_float_t RequiredPrefetchPixelDataBWLuma[__DML_NUM_PLANES__]; + dml_float_t RequiredPrefetchPixelDataBWChroma[__DML_NUM_PLANES__]; + dml_float_t cursor_bw[__DML_NUM_PLANES__]; + dml_float_t cursor_bw_pre[__DML_NUM_PLANES__]; + dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__]; + dml_float_t final_flip_bw[__DML_NUM_PLANES__]; + dml_float_t meta_row_bandwidth_this_state[__DML_NUM_PLANES__]; + dml_float_t dpte_row_bandwidth_this_state[__DML_NUM_PLANES__]; + dml_float_t ReturnBWPerState[2]; + dml_float_t ReturnDRAMBWPerState[2]; + dml_float_t meta_row_bandwidth[2][__DML_NUM_PLANES__]; + dml_float_t dpte_row_bandwidth[2][__DML_NUM_PLANES__]; + + // Something that should be feedback to caller + enum dml_odm_mode ODMModePerState[__DML_NUM_PLANES__]; + enum dml_odm_mode ODMModeThisState[__DML_NUM_PLANES__]; + dml_uint_t SurfaceSizeInMALL[__DML_NUM_PLANES__]; + dml_uint_t NoOfDPP[2][__DML_NUM_PLANES__]; + dml_uint_t NoOfDPPThisState[__DML_NUM_PLANES__]; + dml_bool_t MPCCombine[2][__DML_NUM_PLANES__]; + dml_bool_t MPCCombineThisState[__DML_NUM_PLANES__]; + dml_float_t ProjectedDCFCLKDeepSleep[2]; + dml_float_t MinDPPCLKUsingSingleDPP[__DML_NUM_PLANES__]; + dml_bool_t SingleDPPViewportSizeSupportPerSurface[__DML_NUM_PLANES__]; + dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__]; + dml_bool_t NotUrgentLatencyHiding[__DML_NUM_PLANES__]; + dml_bool_t NotUrgentLatencyHidingPre[__DML_NUM_PLANES__]; + dml_bool_t PTEBufferSizeNotExceededPerState[__DML_NUM_PLANES__]; + dml_bool_t DCCMetaBufferSizeNotExceededPerState[__DML_NUM_PLANES__]; + dml_uint_t PrefetchMode[__DML_NUM_PLANES__]; + dml_uint_t TotalNumberOfActiveDPP[2]; + dml_uint_t TotalNumberOfSingleDPPSurfaces[2]; + dml_uint_t TotalNumberOfDCCActiveDPP[2]; + + dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__]; + +}; // mode_support_st + +/// @brief A mega structure that houses various info for model programming step. +struct mode_program_st { + + //------------- + // Intermediate/Informational + //------------- + dml_float_t UrgentLatency; + dml_float_t UrgentLatencyWithUSRRetraining; + dml_uint_t VInitPreFillY[__DML_NUM_PLANES__]; + dml_uint_t VInitPreFillC[__DML_NUM_PLANES__]; + dml_uint_t MaxNumSwathY[__DML_NUM_PLANES__]; + dml_uint_t MaxNumSwathC[__DML_NUM_PLANES__]; + + dml_float_t BytePerPixelDETY[__DML_NUM_PLANES__]; + dml_float_t BytePerPixelDETC[__DML_NUM_PLANES__]; + dml_uint_t BytePerPixelY[__DML_NUM_PLANES__]; + dml_uint_t BytePerPixelC[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthY[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthC[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPPY[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPPC[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidthSurfaceLuma[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidthSurfaceChroma[__DML_NUM_PLANES__]; + + dml_uint_t PixelPTEBytesPerRow[__DML_NUM_PLANES__]; + dml_uint_t PDEAndMetaPTEBytesFrame[__DML_NUM_PLANES__]; + dml_uint_t MetaRowByte[__DML_NUM_PLANES__]; + dml_uint_t PrefetchSourceLinesY[__DML_NUM_PLANES__]; + dml_float_t RequiredPrefetchPixDataBWLuma[__DML_NUM_PLANES__]; + dml_float_t RequiredPrefetchPixDataBWChroma[__DML_NUM_PLANES__]; + dml_uint_t PrefetchSourceLinesC[__DML_NUM_PLANES__]; + dml_float_t PSCL_THROUGHPUT[__DML_NUM_PLANES__]; + dml_float_t PSCL_THROUGHPUT_CHROMA[__DML_NUM_PLANES__]; + dml_uint_t DSCDelay[__DML_NUM_PLANES__]; + dml_float_t DPPCLKUsingSingleDPP[__DML_NUM_PLANES__]; + + dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__]; + dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__]; + dml_uint_t BlockHeight256BytesY[__DML_NUM_PLANES__]; + dml_uint_t BlockHeight256BytesC[__DML_NUM_PLANES__]; + dml_uint_t BlockWidth256BytesY[__DML_NUM_PLANES__]; + dml_uint_t BlockWidth256BytesC[__DML_NUM_PLANES__]; + + dml_uint_t BlockHeightY[__DML_NUM_PLANES__]; + dml_uint_t BlockHeightC[__DML_NUM_PLANES__]; + dml_uint_t BlockWidthY[__DML_NUM_PLANES__]; + dml_uint_t BlockWidthC[__DML_NUM_PLANES__]; + + dml_uint_t SurfaceSizeInTheMALL[__DML_NUM_PLANES__]; + dml_float_t VRatioPrefetchY[__DML_NUM_PLANES__]; + dml_float_t VRatioPrefetchC[__DML_NUM_PLANES__]; + dml_float_t Tno_bw[__DML_NUM_PLANES__]; + dml_float_t final_flip_bw[__DML_NUM_PLANES__]; + dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__]; + dml_float_t cursor_bw[__DML_NUM_PLANES__]; + dml_float_t cursor_bw_pre[__DML_NUM_PLANES__]; + dml_float_t WritebackDelay[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_linear[__DML_NUM_PLANES__]; + dml_uint_t meta_req_width[__DML_NUM_PLANES__]; + dml_uint_t meta_req_height[__DML_NUM_PLANES__]; + dml_uint_t meta_row_width[__DML_NUM_PLANES__]; + dml_uint_t meta_row_height[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_width_luma_ub[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_width_chroma_ub[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_linear_chroma[__DML_NUM_PLANES__]; + dml_uint_t meta_req_width_chroma[__DML_NUM_PLANES__]; + dml_uint_t meta_req_height_chroma[__DML_NUM_PLANES__]; + dml_uint_t meta_row_width_chroma[__DML_NUM_PLANES__]; + dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__]; + dml_uint_t vm_group_bytes[__DML_NUM_PLANES__]; + dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__]; + dml_float_t meta_row_bw[__DML_NUM_PLANES__]; + dml_float_t dpte_row_bw[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorCursor[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorCursorPre[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorLuma[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorLumaPre[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorChroma[__DML_NUM_PLANES__]; + dml_float_t UrgBurstFactorChromaPre[__DML_NUM_PLANES__]; + + dml_uint_t swath_width_luma_ub[__DML_NUM_PLANES__]; + dml_uint_t swath_width_chroma_ub[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEReqWidthY[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEReqHeightY[__DML_NUM_PLANES__]; + dml_uint_t PTERequestSizeY[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEReqWidthC[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEReqHeightC[__DML_NUM_PLANES__]; + dml_uint_t PTERequestSizeC[__DML_NUM_PLANES__]; + + dml_float_t Tdmdl_vm[__DML_NUM_PLANES__]; + dml_float_t Tdmdl[__DML_NUM_PLANES__]; + dml_float_t TSetup[__DML_NUM_PLANES__]; + dml_uint_t dpde0_bytes_per_frame_ub_l[__DML_NUM_PLANES__]; + dml_uint_t meta_pte_bytes_per_frame_ub_l[__DML_NUM_PLANES__]; + dml_uint_t dpde0_bytes_per_frame_ub_c[__DML_NUM_PLANES__]; + dml_uint_t meta_pte_bytes_per_frame_ub_c[__DML_NUM_PLANES__]; + + dml_bool_t UnboundedRequestEnabled; + dml_uint_t compbuf_reserved_space_64b; + dml_uint_t compbuf_reserved_space_zs; + dml_uint_t CompressedBufferSizeInkByte; + + dml_bool_t NoUrgentLatencyHiding[__DML_NUM_PLANES__]; + dml_bool_t NoUrgentLatencyHidingPre[__DML_NUM_PLANES__]; + dml_float_t UrgentExtraLatency; + dml_bool_t PrefetchAndImmediateFlipSupported; + dml_float_t TotalDataReadBandwidth; + dml_float_t BandwidthAvailableForImmediateFlip; + dml_bool_t NotEnoughTimeForDynamicMetadata[__DML_NUM_PLANES__]; + + dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__]; + + dml_float_t total_dcn_read_bw_with_flip; + dml_float_t total_dcn_read_bw_with_flip_no_urgent_burst; + dml_float_t TotalDataReadBandwidthNotIncludingMALLPrefetch; + dml_float_t total_dcn_read_bw_with_flip_not_including_MALL_prefetch; + dml_float_t non_urgent_total_dcn_read_bw_with_flip; + dml_float_t non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch; + + dml_bool_t use_one_row_for_frame[__DML_NUM_PLANES__]; + dml_bool_t use_one_row_for_frame_flip[__DML_NUM_PLANES__]; + + dml_float_t TCalc; + dml_uint_t TotImmediateFlipBytes; + + // ------------------- + // Output + // ------------------- + dml_uint_t pipe_plane[__DML_NUM_PLANES__]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe + dml_uint_t num_active_pipes; + + dml_bool_t NoTimeToPrefetch[__DML_NUM_PLANES__]; /// <brief Prefetch schedule calculation result + + // Support + dml_uint_t PrefetchMode[__DML_NUM_PLANES__]; /// <brief prefetch mode used for prefetch support check in mode programming step + dml_bool_t PrefetchModeSupported; /// <brief Is the prefetch mode (bandwidth and latency) supported + dml_bool_t ImmediateFlipSupported; + dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__]; + + // Clock + dml_float_t Dcfclk; + dml_float_t Dispclk; /// <brief dispclk being used in mode programming + dml_float_t Dppclk[__DML_NUM_PLANES__]; /// <brief dppclk being used in mode programming + dml_float_t WritebackDISPCLK; + dml_float_t GlobalDPPCLK; + + //@ brief These "calculated" dispclk and dppclk clocks are calculated in the mode programming step. + // Depends on the dml_clk_cfg_st option; these calculated values may not used in subsequent calculation. + // Possible DV usage: Calculated values fetched by test once after mode_programming step and then possibly + // use the values as min and adjust the actual freq used for the 2nd pass + dml_float_t Dispclk_calculated; + dml_float_t Dppclk_calculated[__DML_NUM_PLANES__]; + + dml_float_t DSCCLK_calculated[__DML_NUM_PLANES__]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now + dml_float_t DCFCLKDeepSleep; + + // ARB reg + dml_bool_t DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + struct Watermarks Watermark; + + // DCC compression control + dml_uint_t DCCYMaxUncompressedBlock[__DML_NUM_PLANES__]; + dml_uint_t DCCYMaxCompressedBlock[__DML_NUM_PLANES__]; + dml_uint_t DCCYIndependentBlock[__DML_NUM_PLANES__]; + dml_uint_t DCCCMaxUncompressedBlock[__DML_NUM_PLANES__]; + dml_uint_t DCCCMaxCompressedBlock[__DML_NUM_PLANES__]; + dml_uint_t DCCCIndependentBlock[__DML_NUM_PLANES__]; + + // Stutter Efficiency + dml_float_t StutterEfficiency; + dml_float_t StutterEfficiencyNotIncludingVBlank; + dml_uint_t NumberOfStutterBurstsPerFrame; + dml_float_t Z8StutterEfficiency; + dml_uint_t Z8NumberOfStutterBurstsPerFrame; + dml_float_t Z8StutterEfficiencyNotIncludingVBlank; + dml_float_t StutterPeriod; + dml_float_t Z8StutterEfficiencyBestCase; + dml_uint_t Z8NumberOfStutterBurstsPerFrameBestCase; + dml_float_t Z8StutterEfficiencyNotIncludingVBlankBestCase; + dml_float_t StutterPeriodBestCase; + + // DLG TTU reg + dml_float_t MIN_DST_Y_NEXT_START[__DML_NUM_PLANES__]; + dml_bool_t VREADY_AT_OR_AFTER_VSYNC[__DML_NUM_PLANES__]; + dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__]; + dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesForPrefetch[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestVMInVBlank[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestRowInVBlank[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__]; + dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__]; + dml_float_t MinTTUVBlank[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeLineDeliveryTimeLuma[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeLineDeliveryTimeChroma[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeRequestDeliveryTimeLuma[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeRequestDeliveryTimeChroma[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__]; + dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__]; + dml_float_t CursorRequestDeliveryTime[__DML_NUM_PLANES__]; + dml_float_t CursorRequestDeliveryTimePrefetch[__DML_NUM_PLANES__]; + + dml_float_t DST_Y_PER_PTE_ROW_NOM_L[__DML_NUM_PLANES__]; + dml_float_t DST_Y_PER_PTE_ROW_NOM_C[__DML_NUM_PLANES__]; + dml_float_t DST_Y_PER_META_ROW_NOM_L[__DML_NUM_PLANES__]; + dml_float_t DST_Y_PER_META_ROW_NOM_C[__DML_NUM_PLANES__]; + dml_float_t TimePerMetaChunkNominal[__DML_NUM_PLANES__]; + dml_float_t TimePerChromaMetaChunkNominal[__DML_NUM_PLANES__]; + dml_float_t TimePerMetaChunkVBlank[__DML_NUM_PLANES__]; + dml_float_t TimePerChromaMetaChunkVBlank[__DML_NUM_PLANES__]; + dml_float_t TimePerMetaChunkFlip[__DML_NUM_PLANES__]; + dml_float_t TimePerChromaMetaChunkFlip[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_nom_luma[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_nom_chroma[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_vblank_luma[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_vblank_chroma[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_flip_luma[__DML_NUM_PLANES__]; + dml_float_t time_per_pte_group_flip_chroma[__DML_NUM_PLANES__]; + dml_float_t TimePerVMGroupVBlank[__DML_NUM_PLANES__]; + dml_float_t TimePerVMGroupFlip[__DML_NUM_PLANES__]; + dml_float_t TimePerVMRequestVBlank[__DML_NUM_PLANES__]; + dml_float_t TimePerVMRequestFlip[__DML_NUM_PLANES__]; + + dml_float_t FractionOfUrgentBandwidth; + dml_float_t FractionOfUrgentBandwidthImmediateFlip; + + // RQ registers + dml_bool_t PTE_BUFFER_MODE[__DML_NUM_PLANES__]; + dml_uint_t BIGK_FRAGMENT_SIZE[__DML_NUM_PLANES__]; + + dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__]; + dml_bool_t UsesMALLForStaticScreen[__DML_NUM_PLANES__]; + + // OTG + dml_uint_t VStartupMin[__DML_NUM_PLANES__]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos. + dml_uint_t VStartup[__DML_NUM_PLANES__]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank)) + dml_uint_t VUpdateOffsetPix[__DML_NUM_PLANES__]; + dml_uint_t VUpdateWidthPix[__DML_NUM_PLANES__]; + dml_uint_t VReadyOffsetPix[__DML_NUM_PLANES__]; + + // Latency and Support + dml_float_t MaxActiveFCLKChangeLatencySupported; + dml_bool_t USRRetrainingSupport; + enum dml_fclock_change_support FCLKChangeSupport; + enum dml_dram_clock_change_support DRAMClockChangeSupport; + dml_float_t MaxActiveDRAMClockChangeLatencySupported[__DML_NUM_PLANES__]; + dml_float_t WritebackAllowFCLKChangeEndPosition[__DML_NUM_PLANES__]; + dml_float_t WritebackAllowDRAMClockChangeEndPosition[__DML_NUM_PLANES__]; + + // buffer sizing + dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__]; + dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightY[__DML_NUM_PLANES__]; + dml_uint_t SwathHeightC[__DML_NUM_PLANES__]; +}; // mode_program_st + +struct soc_states_st { + dml_uint_t num_states; /// <brief num of soc pwr states + struct soc_state_bounding_box_st state_array[__DML_MAX_STATE_ARRAY_SIZE__]; /// <brief fixed size array that holds states struct +}; + +struct UseMinimumDCFCLK_params_st { + enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange; + dml_bool_t *DRRDisplay; + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + dml_uint_t MaxInterDCNTileRepeaters; + dml_uint_t MaxPrefetchMode; + dml_float_t DRAMClockChangeLatencyFinal; + dml_float_t FCLKChangeLatency; + dml_float_t SREnterPlusExitTime; + dml_uint_t ReturnBusWidth; + dml_uint_t RoundTripPingLatencyCycles; + dml_uint_t ReorderingBytes; + dml_uint_t PixelChunkSizeInKByte; + dml_uint_t MetaChunkSize; + dml_bool_t GPUVMEnable; + dml_uint_t GPUVMMaxPageTableLevels; + dml_bool_t HostVMEnable; + dml_uint_t NumberOfActiveSurfaces; + dml_uint_t HostVMMinPageSize; + dml_uint_t HostVMMaxNonCachedPageTableLevels; + dml_bool_t DynamicMetadataVMEnabled; + dml_bool_t ImmediateFlipRequirement; + dml_bool_t ProgressiveToInterlaceUnitInOPP; + dml_float_t MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation; + dml_float_t PercentOfIdealSDPPortBWReceivedAfterUrgLatency; + dml_uint_t *VTotal; + dml_uint_t *VActive; + dml_uint_t *DynamicMetadataTransmittedBytes; + dml_uint_t *DynamicMetadataLinesBeforeActiveRequired; + dml_bool_t *Interlace; + dml_float_t (*RequiredDPPCLKPerSurface)[__DML_NUM_PLANES__]; + dml_float_t *RequiredDISPCLK; + dml_float_t UrgLatency; + dml_uint_t (*NoOfDPP)[__DML_NUM_PLANES__]; + dml_float_t *ProjectedDCFCLKDeepSleep; + dml_uint_t (*MaximumVStartup)[__DML_NUM_PLANES__]; + dml_uint_t *TotalNumberOfActiveDPP; + dml_uint_t *TotalNumberOfDCCActiveDPP; + dml_uint_t *dpte_group_bytes; + dml_uint_t (*PrefetchLinesY)[__DML_NUM_PLANES__]; + dml_uint_t (*PrefetchLinesC)[__DML_NUM_PLANES__]; + dml_uint_t (*swath_width_luma_ub_all_states)[__DML_NUM_PLANES__]; + dml_uint_t (*swath_width_chroma_ub_all_states)[__DML_NUM_PLANES__]; + dml_uint_t *BytePerPixelY; + dml_uint_t *BytePerPixelC; + dml_uint_t *HTotal; + dml_float_t *PixelClock; + dml_uint_t (*PDEAndMetaPTEBytesPerFrame)[__DML_NUM_PLANES__]; + dml_uint_t (*DPTEBytesPerRow)[__DML_NUM_PLANES__]; + dml_uint_t (*MetaRowBytes)[__DML_NUM_PLANES__]; + dml_bool_t *DynamicMetadataEnable; + dml_float_t *ReadBandwidthLuma; + dml_float_t *ReadBandwidthChroma; + dml_float_t DCFCLKPerState; + dml_float_t *DCFCLKState; +}; + +struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st { + dml_bool_t USRRetrainingRequiredFinal; + enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange; + dml_uint_t *PrefetchMode; + dml_uint_t NumberOfActiveSurfaces; + dml_uint_t MaxLineBufferLines; + dml_uint_t LineBufferSize; + dml_uint_t WritebackInterfaceBufferSize; + dml_float_t DCFCLK; + dml_float_t ReturnBW; + dml_bool_t SynchronizeTimingsFinal; + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + dml_bool_t *DRRDisplay; + dml_uint_t *dpte_group_bytes; + dml_uint_t *meta_row_height; + dml_uint_t *meta_row_height_chroma; + struct SOCParametersList mmSOCParameters; + dml_uint_t WritebackChunkSize; + dml_float_t SOCCLK; + dml_float_t DCFClkDeepSleep; + dml_uint_t *DETBufferSizeY; + dml_uint_t *DETBufferSizeC; + dml_uint_t *SwathHeightY; + dml_uint_t *SwathHeightC; + dml_uint_t *LBBitPerPixel; + dml_uint_t *SwathWidthY; + dml_uint_t *SwathWidthC; + dml_float_t *HRatio; + dml_float_t *HRatioChroma; + dml_uint_t *VTaps; + dml_uint_t *VTapsChroma; + dml_float_t *VRatio; + dml_float_t *VRatioChroma; + dml_uint_t *HTotal; + dml_uint_t *VTotal; + dml_uint_t *VActive; + dml_float_t *PixelClock; + dml_uint_t *BlendingAndTiming; + dml_uint_t *DPPPerSurface; + dml_float_t *BytePerPixelDETY; + dml_float_t *BytePerPixelDETC; + dml_uint_t *DSTXAfterScaler; + dml_uint_t *DSTYAfterScaler; + dml_bool_t *WritebackEnable; + enum dml_source_format_class *WritebackPixelFormat; + dml_uint_t *WritebackDestinationWidth; + dml_uint_t *WritebackDestinationHeight; + dml_uint_t *WritebackSourceHeight; + dml_bool_t UnboundedRequestEnabled; + dml_uint_t CompressedBufferSizeInkByte; + + // Output + struct Watermarks *Watermark; + enum dml_dram_clock_change_support *DRAMClockChangeSupport; + dml_float_t *MaxActiveDRAMClockChangeLatencySupported; + dml_uint_t *SubViewportLinesNeededInMALL; + enum dml_fclock_change_support *FCLKChangeSupport; + dml_float_t *MaxActiveFCLKChangeLatencySupported; + dml_bool_t *USRRetrainingSupport; + dml_float_t *ActiveDRAMClockChangeLatencyMargin; +}; + +struct CalculateVMRowAndSwath_params_st { + dml_uint_t NumberOfActiveSurfaces; + struct DmlPipe *myPipe; + dml_uint_t *SurfaceSizeInMALL; + dml_uint_t PTEBufferSizeInRequestsLuma; + dml_uint_t PTEBufferSizeInRequestsChroma; + dml_uint_t DCCMetaBufferSizeBytes; + enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen; + enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange; + dml_uint_t MALLAllocatedForDCN; + dml_uint_t *SwathWidthY; + dml_uint_t *SwathWidthC; + dml_bool_t GPUVMEnable; + dml_bool_t HostVMEnable; + dml_uint_t HostVMMaxNonCachedPageTableLevels; + dml_uint_t GPUVMMaxPageTableLevels; + dml_uint_t *GPUVMMinPageSizeKBytes; + dml_uint_t HostVMMinPageSize; + dml_bool_t *PTEBufferModeOverrideEn; + dml_bool_t *PTEBufferModeOverrideVal; + + // Output + dml_bool_t *PTEBufferSizeNotExceeded; + dml_bool_t *DCCMetaBufferSizeNotExceeded; + dml_uint_t *dpte_row_width_luma_ub; + dml_uint_t *dpte_row_width_chroma_ub; + dml_uint_t *dpte_row_height_luma; + dml_uint_t *dpte_row_height_chroma; + dml_uint_t *dpte_row_height_linear_luma; // VBA_DELTA + dml_uint_t *dpte_row_height_linear_chroma; // VBA_DELTA + dml_uint_t *meta_req_width; + dml_uint_t *meta_req_width_chroma; + dml_uint_t *meta_req_height; + dml_uint_t *meta_req_height_chroma; + dml_uint_t *meta_row_width; + dml_uint_t *meta_row_width_chroma; + dml_uint_t *meta_row_height; + dml_uint_t *meta_row_height_chroma; + dml_uint_t *vm_group_bytes; + dml_uint_t *dpte_group_bytes; + dml_uint_t *PixelPTEReqWidthY; + dml_uint_t *PixelPTEReqHeightY; + dml_uint_t *PTERequestSizeY; + dml_uint_t *PixelPTEReqWidthC; + dml_uint_t *PixelPTEReqHeightC; + dml_uint_t *PTERequestSizeC; + dml_uint_t *dpde0_bytes_per_frame_ub_l; + dml_uint_t *meta_pte_bytes_per_frame_ub_l; + dml_uint_t *dpde0_bytes_per_frame_ub_c; + dml_uint_t *meta_pte_bytes_per_frame_ub_c; + dml_uint_t *PrefetchSourceLinesY; + dml_uint_t *PrefetchSourceLinesC; + dml_uint_t *VInitPreFillY; + dml_uint_t *VInitPreFillC; + dml_uint_t *MaxNumSwathY; + dml_uint_t *MaxNumSwathC; + dml_float_t *meta_row_bw; + dml_float_t *dpte_row_bw; + dml_uint_t *PixelPTEBytesPerRow; + dml_uint_t *PDEAndMetaPTEBytesFrame; + dml_uint_t *MetaRowByte; + dml_bool_t *use_one_row_for_frame; + dml_bool_t *use_one_row_for_frame_flip; + dml_bool_t *UsesMALLForStaticScreen; + dml_bool_t *PTE_BUFFER_MODE; + dml_uint_t *BIGK_FRAGMENT_SIZE; +}; + +struct CalculateSwathAndDETConfiguration_params_st { + dml_uint_t *DETSizeOverride; + enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange; + dml_uint_t ConfigReturnBufferSizeInKByte; + dml_uint_t ROBBufferSizeInKByte; + dml_uint_t MaxTotalDETInKByte; + dml_uint_t MinCompressedBufferSizeInKByte; + dml_uint_t PixelChunkSizeInKByte; + dml_bool_t ForceSingleDPP; + dml_uint_t NumberOfActiveSurfaces; + dml_uint_t nomDETInKByte; + enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal; + dml_uint_t ConfigReturnBufferSegmentSizeInkByte; + dml_uint_t CompressedBufferSegmentSizeInkByteFinal; + enum dml_output_encoder_class *Output; + dml_float_t *ReadBandwidthLuma; + dml_float_t *ReadBandwidthChroma; + dml_float_t *MaximumSwathWidthLuma; + dml_float_t *MaximumSwathWidthChroma; + enum dml_rotation_angle *SourceScan; + dml_bool_t *ViewportStationary; + enum dml_source_format_class *SourcePixelFormat; + enum dml_swizzle_mode *SurfaceTiling; + dml_uint_t *ViewportWidth; + dml_uint_t *ViewportHeight; + dml_uint_t *ViewportXStart; + dml_uint_t *ViewportYStart; + dml_uint_t *ViewportXStartC; + dml_uint_t *ViewportYStartC; + dml_uint_t *SurfaceWidthY; + dml_uint_t *SurfaceWidthC; + dml_uint_t *SurfaceHeightY; + dml_uint_t *SurfaceHeightC; + dml_uint_t *Read256BytesBlockHeightY; + dml_uint_t *Read256BytesBlockHeightC; + dml_uint_t *Read256BytesBlockWidthY; + dml_uint_t *Read256BytesBlockWidthC; + enum dml_odm_mode *ODMMode; + dml_uint_t *BlendingAndTiming; + dml_uint_t *BytePerPixY; + dml_uint_t *BytePerPixC; + dml_float_t *BytePerPixDETY; + dml_float_t *BytePerPixDETC; + dml_uint_t *HActive; + dml_float_t *HRatio; + dml_float_t *HRatioChroma; + dml_uint_t *DPPPerSurface; + dml_uint_t *swath_width_luma_ub; + dml_uint_t *swath_width_chroma_ub; + dml_uint_t *SwathWidth; + dml_uint_t *SwathWidthChroma; + dml_uint_t *SwathHeightY; + dml_uint_t *SwathHeightC; + dml_uint_t *DETBufferSizeInKByte; + dml_uint_t *DETBufferSizeY; + dml_uint_t *DETBufferSizeC; + dml_bool_t *UnboundedRequestEnabled; + dml_uint_t *compbuf_reserved_space_64b; + dml_uint_t *compbuf_reserved_space_zs; + dml_uint_t *CompressedBufferSizeInkByte; + dml_bool_t *ViewportSizeSupportPerSurface; + dml_bool_t *ViewportSizeSupport; +}; + +struct CalculateStutterEfficiency_params_st { + dml_uint_t CompressedBufferSizeInkByte; + enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange; + dml_bool_t UnboundedRequestEnabled; + dml_uint_t MetaFIFOSizeInKEntries; + dml_uint_t ZeroSizeBufferEntries; + dml_uint_t PixelChunkSizeInKByte; + dml_uint_t NumberOfActiveSurfaces; + dml_uint_t ROBBufferSizeInKByte; + dml_float_t TotalDataReadBandwidth; + dml_float_t DCFCLK; + dml_float_t ReturnBW; + dml_uint_t CompbufReservedSpace64B; + dml_uint_t CompbufReservedSpaceZs; + dml_float_t SRExitTime; + dml_float_t SRExitZ8Time; + dml_bool_t SynchronizeTimingsFinal; + dml_uint_t *BlendingAndTiming; + dml_float_t StutterEnterPlusExitWatermark; + dml_float_t Z8StutterEnterPlusExitWatermark; + dml_bool_t ProgressiveToInterlaceUnitInOPP; + dml_bool_t *Interlace; + dml_float_t *MinTTUVBlank; + dml_uint_t *DPPPerSurface; + dml_uint_t *DETBufferSizeY; + dml_uint_t *BytePerPixelY; + dml_float_t *BytePerPixelDETY; + dml_uint_t *SwathWidthY; + dml_uint_t *SwathHeightY; + dml_uint_t *SwathHeightC; + dml_float_t *NetDCCRateLuma; + dml_float_t *NetDCCRateChroma; + dml_float_t *DCCFractionOfZeroSizeRequestsLuma; + dml_float_t *DCCFractionOfZeroSizeRequestsChroma; + dml_uint_t *HTotal; + dml_uint_t *VTotal; + dml_float_t *PixelClock; + dml_float_t *VRatio; + enum dml_rotation_angle *SourceScan; + dml_uint_t *BlockHeight256BytesY; + dml_uint_t *BlockWidth256BytesY; + dml_uint_t *BlockHeight256BytesC; + dml_uint_t *BlockWidth256BytesC; + dml_uint_t *DCCYMaxUncompressedBlock; + dml_uint_t *DCCCMaxUncompressedBlock; + dml_uint_t *VActive; + dml_bool_t *DCCEnable; + dml_bool_t *WritebackEnable; + dml_float_t *ReadBandwidthSurfaceLuma; + dml_float_t *ReadBandwidthSurfaceChroma; + dml_float_t *meta_row_bw; + dml_float_t *dpte_row_bw; + dml_float_t *StutterEfficiencyNotIncludingVBlank; + dml_float_t *StutterEfficiency; + dml_uint_t *NumberOfStutterBurstsPerFrame; + dml_float_t *Z8StutterEfficiencyNotIncludingVBlank; + dml_float_t *Z8StutterEfficiency; + dml_uint_t *Z8NumberOfStutterBurstsPerFrame; + dml_float_t *StutterPeriod; + dml_bool_t *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; +}; + +struct CalculatePrefetchSchedule_params_st { + dml_bool_t EnhancedPrefetchScheduleAccelerationFinal; + dml_float_t HostVMInefficiencyFactor; + struct DmlPipe *myPipe; + dml_uint_t DSCDelay; + dml_float_t DPPCLKDelaySubtotalPlusCNVCFormater; + dml_float_t DPPCLKDelaySCL; + dml_float_t DPPCLKDelaySCLLBOnly; + dml_float_t DPPCLKDelayCNVCCursor; + dml_float_t DISPCLKDelaySubtotal; + dml_uint_t DPP_RECOUT_WIDTH; + enum dml_output_format_class OutputFormat; + dml_uint_t MaxInterDCNTileRepeaters; + dml_uint_t VStartup; + dml_uint_t MaxVStartup; + dml_uint_t GPUVMPageTableLevels; + dml_bool_t GPUVMEnable; + dml_bool_t HostVMEnable; + dml_uint_t HostVMMaxNonCachedPageTableLevels; + dml_uint_t HostVMMinPageSize; + dml_bool_t DynamicMetadataEnable; + dml_bool_t DynamicMetadataVMEnabled; + int DynamicMetadataLinesBeforeActiveRequired; + dml_uint_t DynamicMetadataTransmittedBytes; + dml_float_t UrgentLatency; + dml_float_t UrgentExtraLatency; + dml_float_t TCalc; + dml_uint_t PDEAndMetaPTEBytesFrame; + dml_uint_t MetaRowByte; + dml_uint_t PixelPTEBytesPerRow; + dml_float_t PrefetchSourceLinesY; + dml_uint_t VInitPreFillY; + dml_uint_t MaxNumSwathY; + dml_float_t PrefetchSourceLinesC; + dml_uint_t VInitPreFillC; + dml_uint_t MaxNumSwathC; + dml_uint_t swath_width_luma_ub; + dml_uint_t swath_width_chroma_ub; + dml_uint_t SwathHeightY; + dml_uint_t SwathHeightC; + dml_float_t TWait; + dml_uint_t *DSTXAfterScaler; + dml_uint_t *DSTYAfterScaler; + dml_float_t *DestinationLinesForPrefetch; + dml_float_t *DestinationLinesToRequestVMInVBlank; + dml_float_t *DestinationLinesToRequestRowInVBlank; + dml_float_t *VRatioPrefetchY; + dml_float_t *VRatioPrefetchC; + dml_float_t *RequiredPrefetchPixDataBWLuma; + dml_float_t *RequiredPrefetchPixDataBWChroma; + dml_bool_t *NotEnoughTimeForDynamicMetadata; + dml_float_t *Tno_bw; + dml_float_t *prefetch_vmrow_bw; + dml_float_t *Tdmdl_vm; + dml_float_t *Tdmdl; + dml_float_t *TSetup; + dml_uint_t *VUpdateOffsetPix; + dml_uint_t *VUpdateWidthPix; + dml_uint_t *VReadyOffsetPix; +}; + +struct dml_core_mode_support_locals_st { + dml_bool_t dummy_boolean[2]; + dml_uint_t dummy_integer[3]; + dml_uint_t dummy_integer_array[22][__DML_NUM_PLANES__]; + enum dml_odm_mode dummy_odm_mode[__DML_NUM_PLANES__]; + dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__]; + dml_uint_t MaxVStartupAllPlanes[2]; + dml_uint_t MaximumVStartup[2][__DML_NUM_PLANES__]; + dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__]; + dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__]; + dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__]; + dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__]; + dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__]; + dml_float_t dummy_single[3]; + dml_float_t dummy_single_array[__DML_NUM_PLANES__]; + struct Watermarks dummy_watermark; + struct SOCParametersList mSOCParameters; + struct DmlPipe myPipe; + struct DmlPipe SurfParameters[__DML_NUM_PLANES__]; + dml_uint_t TotalNumberOfActiveWriteback; + dml_uint_t MaximumSwathWidthSupportLuma; + dml_uint_t MaximumSwathWidthSupportChroma; + dml_bool_t MPCCombineMethodAsNeededForPStateChangeAndVoltage; + dml_bool_t MPCCombineMethodAsPossible; + dml_bool_t TotalAvailablePipesSupportNoDSC; + dml_uint_t NumberOfDPPNoDSC; + enum dml_odm_mode ODMModeNoDSC; + dml_float_t RequiredDISPCLKPerSurfaceNoDSC; + dml_bool_t TotalAvailablePipesSupportDSC; + dml_uint_t NumberOfDPPDSC; + enum dml_odm_mode ODMModeDSC; + dml_float_t RequiredDISPCLKPerSurfaceDSC; + dml_bool_t NoChromaOrLinear; + dml_float_t BWOfNonCombinedSurfaceOfMaximumBandwidth; + dml_uint_t NumberOfNonCombinedSurfaceOfMaximumBandwidth; + dml_uint_t TotalNumberOfActiveOTG; + dml_uint_t TotalNumberOfActiveHDMIFRL; + dml_uint_t TotalNumberOfActiveDP2p0; + dml_uint_t TotalNumberOfActiveDP2p0Outputs; + dml_uint_t TotalSlots; + dml_uint_t DSCFormatFactor; + dml_uint_t TotalDSCUnitsRequired; + dml_uint_t ReorderingBytes; + dml_bool_t ImmediateFlipRequiredFinal; + dml_bool_t FullFrameMALLPStateMethod; + dml_bool_t SubViewportMALLPStateMethod; + dml_bool_t PhantomPipeMALLPStateMethod; + dml_bool_t SubViewportMALLRefreshGreaterThan120Hz; + dml_float_t MaxTotalVActiveRDBandwidth; + dml_float_t VMDataOnlyReturnBWPerState; + dml_float_t HostVMInefficiencyFactor; + dml_uint_t NextMaxVStartup; + dml_uint_t MaxVStartup; + dml_bool_t AllPrefetchModeTested; + dml_bool_t AnyLinesForVMOrRowTooLarge; + dml_bool_t is_max_pwr_state; + dml_bool_t is_max_dram_pwr_state; + dml_bool_t dram_clock_change_support; + dml_bool_t f_clock_change_support; +}; + +struct dml_core_mode_programming_locals_st { + dml_uint_t DSCFormatFactor; + dml_uint_t dummy_integer_array[2][__DML_NUM_PLANES__]; + enum dml_output_encoder_class dummy_output_encoder_array[__DML_NUM_PLANES__]; + dml_float_t dummy_single_array[2][__DML_NUM_PLANES__]; + dml_uint_t dummy_long_array[4][__DML_NUM_PLANES__]; + dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__]; + dml_bool_t dummy_boolean[1]; + struct DmlPipe SurfaceParameters[__DML_NUM_PLANES__]; + dml_uint_t ReorderBytes; + dml_float_t VMDataOnlyReturnBW; + dml_float_t HostVMInefficiencyFactor; + dml_uint_t TotalDCCActiveDPP; + dml_uint_t TotalActiveDPP; + dml_uint_t VStartupLines; + dml_uint_t MaxVStartupLines[__DML_NUM_PLANES__]; /// <brief more like vblank for the plane's OTG + dml_uint_t MaxVStartupAllPlanes; + dml_bool_t ImmediateFlipRequirementFinal; + int iteration; + dml_float_t MaxTotalRDBandwidth; + dml_float_t MaxTotalRDBandwidthNoUrgentBurst; + dml_bool_t DestinationLineTimesForPrefetchLessThan2; + dml_bool_t VRatioPrefetchMoreThanMax; + dml_float_t MaxTotalRDBandwidthNotIncludingMALLPrefetch; + dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__]; + dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__]; + dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__]; + dml_bool_t AllPrefetchModeTested; + dml_float_t dummy_unit_vector[__DML_NUM_PLANES__]; + dml_float_t NonUrgentMaxTotalRDBandwidth; + dml_float_t NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch; + dml_float_t dummy_single[2]; + struct SOCParametersList mmSOCParameters; + dml_float_t Tvstartup_margin; + dml_float_t dlg_vblank_start; + dml_float_t LSetup; + dml_float_t blank_lines_remaining; + dml_float_t old_MIN_DST_Y_NEXT_START; + dml_float_t TotalWRBandwidth; + dml_float_t WRBandwidth; + struct Watermarks dummy_watermark; + struct DmlPipe myPipe; +}; + +struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st { + dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__]; + dml_float_t ActiveFCLKChangeLatencyMargin[__DML_NUM_PLANES__]; + dml_float_t USRRetrainingLatencyMargin[__DML_NUM_PLANES__]; + + dml_bool_t SynchronizedSurfaces[__DML_NUM_PLANES__][__DML_NUM_PLANES__]; + dml_float_t EffectiveLBLatencyHidingY; + dml_float_t EffectiveLBLatencyHidingC; + dml_float_t LinesInDETY[__DML_NUM_PLANES__]; + dml_float_t LinesInDETC[__DML_NUM_PLANES__]; + dml_uint_t LinesInDETYRoundedDownToSwath[__DML_NUM_PLANES__]; + dml_uint_t LinesInDETCRoundedDownToSwath[__DML_NUM_PLANES__]; + dml_float_t FullDETBufferingTimeY; + dml_float_t FullDETBufferingTimeC; + dml_float_t WritebackDRAMClockChangeLatencyMargin; + dml_float_t WritebackFCLKChangeLatencyMargin; + dml_float_t WritebackLatencyHiding; + + dml_uint_t TotalActiveWriteback; + dml_uint_t LBLatencyHidingSourceLinesY[__DML_NUM_PLANES__]; + dml_uint_t LBLatencyHidingSourceLinesC[__DML_NUM_PLANES__]; + dml_float_t TotalPixelBW; + dml_float_t EffectiveDETBufferSizeY; + dml_float_t ActiveClockChangeLatencyHidingY; + dml_float_t ActiveClockChangeLatencyHidingC; + dml_float_t ActiveClockChangeLatencyHiding; + dml_bool_t FoundCriticalSurface; + dml_uint_t LastSurfaceWithoutMargin; + dml_uint_t FCLKChangeSupportNumber; + dml_uint_t DRAMClockChangeMethod; + dml_uint_t DRAMClockChangeSupportNumber; + dml_uint_t dst_y_pstate; + dml_uint_t src_y_pstate_l; + dml_uint_t src_y_pstate_c; + dml_uint_t src_y_ahead_l; + dml_uint_t src_y_ahead_c; + dml_uint_t sub_vp_lines_l; + dml_uint_t sub_vp_lines_c; +}; + +struct CalculateVMRowAndSwath_locals_st { + dml_uint_t PTEBufferSizeInRequestsForLuma[__DML_NUM_PLANES__]; + dml_uint_t PTEBufferSizeInRequestsForChroma[__DML_NUM_PLANES__]; + dml_uint_t PDEAndMetaPTEBytesFrameY; + dml_uint_t PDEAndMetaPTEBytesFrameC; + dml_uint_t MetaRowByteY[__DML_NUM_PLANES__]; + dml_uint_t MetaRowByteC[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowY[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowC[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowStorageY[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowStorageC[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowY_one_row_per_frame[__DML_NUM_PLANES__]; + dml_uint_t PixelPTEBytesPerRowC_one_row_per_frame[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_width_luma_ub_one_row_per_frame[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_luma_one_row_per_frame[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_width_chroma_ub_one_row_per_frame[__DML_NUM_PLANES__]; + dml_uint_t dpte_row_height_chroma_one_row_per_frame[__DML_NUM_PLANES__]; + dml_bool_t one_row_per_frame_fits_in_buffer[__DML_NUM_PLANES__]; + + dml_uint_t HostVMDynamicLevels; +}; + +struct UseMinimumDCFCLK_locals_st { + dml_uint_t dummy1; + dml_uint_t dummy2; + dml_uint_t dummy3; + dml_float_t NormalEfficiency; + dml_float_t TotalMaxPrefetchFlipDPTERowBandwidth[2]; + + dml_float_t PixelDCFCLKCyclesRequiredInPrefetch[__DML_NUM_PLANES__]; + dml_float_t PrefetchPixelLinesTime[__DML_NUM_PLANES__]; + dml_float_t DCFCLKRequiredForPeakBandwidthPerSurface[__DML_NUM_PLANES__]; + dml_float_t DynamicMetadataVMExtraLatency[__DML_NUM_PLANES__]; + dml_float_t MinimumTWait; + dml_float_t DPTEBandwidth; + dml_float_t DCFCLKRequiredForAverageBandwidth; + dml_uint_t ExtraLatencyBytes; + dml_float_t ExtraLatencyCycles; + dml_float_t DCFCLKRequiredForPeakBandwidth; + dml_uint_t NoOfDPPState[__DML_NUM_PLANES__]; + dml_float_t MinimumTvmPlus2Tr0; +}; + +struct CalculatePrefetchSchedule_locals_st { + dml_bool_t MyError; + dml_uint_t DPPCycles; + dml_uint_t DISPCLKCycles; + dml_float_t DSTTotalPixelsAfterScaler; + dml_float_t LineTime; + dml_float_t dst_y_prefetch_equ; + dml_float_t prefetch_bw_oto; + dml_float_t Tvm_oto; + dml_float_t Tr0_oto; + dml_float_t Tvm_oto_lines; + dml_float_t Tr0_oto_lines; + dml_float_t dst_y_prefetch_oto; + dml_float_t TimeForFetchingMetaPTE; + dml_float_t TimeForFetchingRowInVBlank; + dml_float_t LinesToRequestPrefetchPixelData; + dml_uint_t HostVMDynamicLevelsTrips; + dml_float_t trip_to_mem; + dml_float_t Tvm_trips; + dml_float_t Tr0_trips; + dml_float_t Tvm_trips_rounded; + dml_float_t Tr0_trips_rounded; + dml_float_t max_Tsw; + dml_float_t Lsw_oto; + dml_float_t Tpre_rounded; + dml_float_t prefetch_bw_equ; + dml_float_t Tvm_equ; + dml_float_t Tr0_equ; + dml_float_t Tdmbf; + dml_float_t Tdmec; + dml_float_t Tdmsks; + dml_float_t prefetch_sw_bytes; + dml_float_t prefetch_bw_pr; + dml_float_t bytes_pp; + dml_float_t dep_bytes; + dml_float_t min_Lsw_oto; + dml_float_t Tsw_est1; + dml_float_t Tsw_est3; + dml_float_t PrefetchBandwidth1; + dml_float_t PrefetchBandwidth2; + dml_float_t PrefetchBandwidth3; + dml_float_t PrefetchBandwidth4; +}; + +/// @brief To minimize stack usage; function locals are instead placed into this scratch structure which is allocated per context +struct display_mode_lib_scratch_st { + // Scratch space for function locals + struct dml_core_mode_support_locals_st dml_core_mode_support_locals; + struct dml_core_mode_programming_locals_st dml_core_mode_programming_locals; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + struct CalculateVMRowAndSwath_locals_st CalculateVMRowAndSwath_locals; + struct UseMinimumDCFCLK_locals_st UseMinimumDCFCLK_locals; + struct CalculatePrefetchSchedule_locals_st CalculatePrefetchSchedule_locals; + + // Scratch space for function params + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct CalculateVMRowAndSwath_params_st CalculateVMRowAndSwath_params; + struct UseMinimumDCFCLK_params_st UseMinimumDCFCLK_params; + struct CalculateSwathAndDETConfiguration_params_st CalculateSwathAndDETConfiguration_params; + struct CalculateStutterEfficiency_params_st CalculateStutterEfficiency_params; + struct CalculatePrefetchSchedule_params_st CalculatePrefetchSchedule_params; +}; + +/// @brief Represent the overall soc/ip environment. It contains data structure represent the soc/ip characteristic and also structures that hold calculation output +struct display_mode_lib_st { + dml_uint_t project; + + //@brief Mode evaluation and programming policy + struct dml_mode_eval_policy_st policy; + + //@brief IP/SOC characteristic + struct ip_params_st ip; + struct soc_bounding_box_st soc; + struct soc_states_st states; + + //@brief Mode Support and Mode programming struct + // Used to hold input; intermediate and output of the calculations + struct mode_support_st ms; // struct for mode support + struct mode_program_st mp; // struct for mode programming + + struct display_mode_lib_scratch_st scratch; +}; + +struct dml_mode_support_ex_params_st { + struct display_mode_lib_st *mode_lib; + const struct dml_display_cfg_st *in_display_cfg; + dml_uint_t in_start_state_idx; + dml_uint_t out_lowest_state_idx; + struct dml_mode_support_info_st *out_evaluation_info; +}; + +typedef struct _vcs_dpi_dml_display_rq_regs_st dml_display_rq_regs_st; +typedef struct _vcs_dpi_dml_display_dlg_regs_st dml_display_dlg_regs_st; +typedef struct _vcs_dpi_dml_display_ttu_regs_st dml_display_ttu_regs_st; +typedef struct _vcs_dpi_dml_display_arb_params_st dml_display_arb_params_st; +typedef struct _vcs_dpi_dml_display_plane_rq_regs_st dml_display_plane_rq_regs_st; + +struct _vcs_dpi_dml_display_dlg_regs_st { + dml_uint_t refcyc_h_blank_end; + dml_uint_t dlg_vblank_end; + dml_uint_t min_dst_y_next_start; + dml_uint_t refcyc_per_htotal; + dml_uint_t refcyc_x_after_scaler; + dml_uint_t dst_y_after_scaler; + dml_uint_t dst_y_prefetch; + dml_uint_t dst_y_per_vm_vblank; + dml_uint_t dst_y_per_row_vblank; + dml_uint_t dst_y_per_vm_flip; + dml_uint_t dst_y_per_row_flip; + dml_uint_t ref_freq_to_pix_freq; + dml_uint_t vratio_prefetch; + dml_uint_t vratio_prefetch_c; + dml_uint_t refcyc_per_pte_group_vblank_l; + dml_uint_t refcyc_per_pte_group_vblank_c; + dml_uint_t refcyc_per_meta_chunk_vblank_l; + dml_uint_t refcyc_per_meta_chunk_vblank_c; + dml_uint_t refcyc_per_pte_group_flip_l; + dml_uint_t refcyc_per_pte_group_flip_c; + dml_uint_t refcyc_per_meta_chunk_flip_l; + dml_uint_t refcyc_per_meta_chunk_flip_c; + dml_uint_t dst_y_per_pte_row_nom_l; + dml_uint_t dst_y_per_pte_row_nom_c; + dml_uint_t refcyc_per_pte_group_nom_l; + dml_uint_t refcyc_per_pte_group_nom_c; + dml_uint_t dst_y_per_meta_row_nom_l; + dml_uint_t dst_y_per_meta_row_nom_c; + dml_uint_t refcyc_per_meta_chunk_nom_l; + dml_uint_t refcyc_per_meta_chunk_nom_c; + dml_uint_t refcyc_per_line_delivery_pre_l; + dml_uint_t refcyc_per_line_delivery_pre_c; + dml_uint_t refcyc_per_line_delivery_l; + dml_uint_t refcyc_per_line_delivery_c; + dml_uint_t refcyc_per_vm_group_vblank; + dml_uint_t refcyc_per_vm_group_flip; + dml_uint_t refcyc_per_vm_req_vblank; + dml_uint_t refcyc_per_vm_req_flip; + dml_uint_t dst_y_offset_cur0; + dml_uint_t chunk_hdl_adjust_cur0; + dml_uint_t dst_y_offset_cur1; + dml_uint_t chunk_hdl_adjust_cur1; + dml_uint_t vready_after_vcount0; + dml_uint_t dst_y_delta_drq_limit; + dml_uint_t refcyc_per_vm_dmdata; + dml_uint_t dmdata_dl_delta; +}; + +struct _vcs_dpi_dml_display_ttu_regs_st { + dml_uint_t qos_level_low_wm; + dml_uint_t qos_level_high_wm; + dml_uint_t min_ttu_vblank; + dml_uint_t qos_level_flip; + dml_uint_t refcyc_per_req_delivery_l; + dml_uint_t refcyc_per_req_delivery_c; + dml_uint_t refcyc_per_req_delivery_cur0; + dml_uint_t refcyc_per_req_delivery_cur1; + dml_uint_t refcyc_per_req_delivery_pre_l; + dml_uint_t refcyc_per_req_delivery_pre_c; + dml_uint_t refcyc_per_req_delivery_pre_cur0; + dml_uint_t refcyc_per_req_delivery_pre_cur1; + dml_uint_t qos_level_fixed_l; + dml_uint_t qos_level_fixed_c; + dml_uint_t qos_level_fixed_cur0; + dml_uint_t qos_level_fixed_cur1; + dml_uint_t qos_ramp_disable_l; + dml_uint_t qos_ramp_disable_c; + dml_uint_t qos_ramp_disable_cur0; + dml_uint_t qos_ramp_disable_cur1; +}; + +struct _vcs_dpi_dml_display_arb_params_st { + dml_uint_t max_req_outstanding; + dml_uint_t min_req_outstanding; + dml_uint_t sat_level_us; + dml_uint_t hvm_max_qos_commit_threshold; + dml_uint_t hvm_min_req_outstand_commit_threshold; + dml_uint_t compbuf_reserved_space_kbytes; +}; + +struct _vcs_dpi_dml_display_plane_rq_regs_st { + dml_uint_t chunk_size; + dml_uint_t min_chunk_size; + dml_uint_t meta_chunk_size; + dml_uint_t min_meta_chunk_size; + dml_uint_t dpte_group_size; + dml_uint_t mpte_group_size; + dml_uint_t swath_height; + dml_uint_t pte_row_height_linear; +}; + +struct _vcs_dpi_dml_display_rq_regs_st { + dml_display_plane_rq_regs_st rq_regs_l; + dml_display_plane_rq_regs_st rq_regs_c; + dml_uint_t drq_expansion_mode; + dml_uint_t prq_expansion_mode; + dml_uint_t mrq_expansion_mode; + dml_uint_t crq_expansion_mode; + dml_uint_t plane1_base_address; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h new file mode 100644 index 000000000000..e574c81edf5e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_LIB_DEFINES_H__ +#define __DISPLAY_MODE_LIB_DEFINES_H__ + +#define DCN_DML__DML_STANDALONE 1 +#define DCN_DML__DML_STANDALONE__1 1 +#define DCN_DML__PRESENT 1 +#define DCN_DML__PRESENT__1 1 +#define DCN_DML__NUM_PLANE 8 +#define DCN_DML__NUM_PLANE__8 1 +#define DCN_DML__NUM_CURSOR 1 +#define DCN_DML__NUM_CURSOR__1 1 +#define DCN_DML__NUM_PWR_STATE 30 +#define DCN_DML__NUM_PWR_STATE__30 1 +#define DCN_DML__VM_PRESENT 1 +#define DCN_DML__VM_PRESENT__1 1 +#define DCN_DML__HOST_VM_PRESENT 1 +#define DCN_DML__HOST_VM_PRESENT__1 1 +#define DCN_DML__DWB 1 + +#include "dml_depedencies.h" + +#include "dml_logging.h" +#include "dml_assert.h" + +// To enable a lot of debug msg +#define __DML_VBA_DEBUG__ +#define __DML_VBA_ENABLE_INLINE_CHECK_ 0 +#define __DML_VBA_MIN_VSTARTUP__ 9 //<brief At which vstartup the DML start to try if the mode can be supported +#define __DML_ARB_TO_RET_DELAY__ (7 + 95) //<brief Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) +#define __DML_MIN_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation +#define __DML_MAX_VRATIO_PRE__ 4.0 //<brief Prefetch schedule max vratio +#define __DML_MAX_VRATIO_PRE_OTO__ 4.0 //<brief Prefetch schedule max vratio for one to one scheduling calculation for prefetch +#define __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ 6.0 //<brief Prefetch schedule max vratio when enhance prefetch schedule acceleration is enabled and vstartup is earliest possible already +#define __DML_NUM_PLANES__ DCN_DML__NUM_PLANE +#define __DML_NUM_CURSORS__ DCN_DML__NUM_CURSOR +#define __DML_DPP_INVALID__ 0 +#define __DML_NUM_DMB__ DCN_DML__DWB +#define __DML_PIPE_NO_PLANE__ 99 + +#define __DML_MAX_STATE_ARRAY_SIZE__ DCN_DML__NUM_PWR_STATE + +// Compilation define +#define __DML_DLL_EXPORT__ + +typedef int dml_int_t; // int is 32-bit in C/C++, but Integer datatype is 16-bit in VBA. this should map to Long in VBA +typedef unsigned int dml_uint_t; +typedef double dml_float_t; + +// Note: bool is 8-bit in C/C++, but Boolean is 16-bit in VBA, use "short" in C/C++ DLL so the struct work when vba uses DLL +// Or the VBA side don't use Boolean, just use "Byte", then C side can use bool +typedef bool dml_bool_t; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c new file mode 100644 index 000000000000..89890c88fd66 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c @@ -0,0 +1,798 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_util.h" + +static dml_float_t _log(float in) +{ + int * const exp_ptr = (int *)(&in); + int x = *exp_ptr; + const int log_2 = ((x >> 23) & 255) - 128; + + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + in = ((-1.0f / 3) * in + 2) * in - 2.0f / 3; + + return (in + log_2); +} + +dml_bool_t dml_util_is_420(enum dml_source_format_class source_format) +{ + dml_bool_t val = false; + + switch (source_format) { + case dml_444_16: + val = 0; + break; + case dml_444_32: + val = 0; + break; + case dml_444_64: + val = 0; + break; + case dml_420_8: + val = 1; + break; + case dml_420_10: + val = 1; + break; + case dml_422_8: + val = 0; + break; + case dml_422_10: + val = 0; + break; + default: + ASSERT(0); + break; + } + return val; +} + +static inline float dcn_bw_pow(float a, float exp) +{ + float temp; + /*ASSERT(exp == (int)exp);*/ + if ((int)exp == 0) + return 1; + temp = dcn_bw_pow(a, (int)(exp / 2)); + if (((int)exp % 2) == 0) { + return temp * temp; + } else { + if ((int)exp > 0) + return a * temp * temp; + else + return (temp * temp) / a; + } +} + +static inline float dcn_bw_ceil2(const float arg, const float significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance + 0.99999)) * significance; +} + +static inline float dcn_bw_floor2(const float arg, const float significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance)) * significance; +} + +dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity) +{ + if (granularity == 0) + return 0; + //return (dml_float_t) (ceil(x / granularity) * granularity); + return (dml_float_t)dcn_bw_ceil2(x, granularity); +} + +dml_float_t dml_floor(dml_float_t x, dml_float_t granularity) +{ + if (granularity == 0) + return 0; + //return (dml_float_t) (floor(x / granularity) * granularity); + return (dml_float_t)dcn_bw_floor2(x, granularity); +} + +dml_float_t dml_min(dml_float_t x, dml_float_t y) +{ + if (x != x) + return y; + if (y != y) + return x; + if (x < y) + return x; + else + return y; +} + +dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z) +{ + return dml_min(dml_min(x, y), z); +} + +dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w) +{ + return dml_min(dml_min(dml_min(x, y), z), w); +} + +dml_float_t dml_max(dml_float_t x, dml_float_t y) +{ + if (x != x) + return y; + if (y != y) + return x; +if (x > y) + return x; + else + return y; +} +dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z) +{ + return dml_max(dml_max(x, y), z); +} +dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d) +{ + return dml_max(dml_max(a, b), dml_max(c, d)); +} +dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e) +{ + return dml_max(dml_max4(a, b, c, d), e); +} +dml_float_t dml_log(dml_float_t x, dml_float_t base) +{ + return (dml_float_t) (_log(x) / _log(base)); +} + +dml_float_t dml_log2(dml_float_t x) +{ + return (dml_float_t) (_log(x) / _log(2)); +} + +dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding) +{ +// if (bankers_rounding) +// return (dml_float_t) lrint(val); +// else { +// return round(val); + double round_pt = 0.5; + double ceil = dml_ceil(val, 1); + double floor = dml_floor(val, 1); + + if (val - floor >= round_pt) + return ceil; + else + return floor; +// } +} + +dml_float_t dml_pow(dml_float_t base, int exp) +{ + return (dml_float_t) dcn_bw_pow(base, exp); +} + +dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up) +{ + dml_uint_t remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n"); + dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size); + dml_print("DML: min_chunk_size = 0x%x\n", rq_regs->min_chunk_size); + dml_print("DML: meta_chunk_size = 0x%x\n", rq_regs->meta_chunk_size); + dml_print("DML: min_meta_chunk_size = 0x%x\n", rq_regs->min_meta_chunk_size); + dml_print("DML: dpte_group_size = 0x%x\n", rq_regs->dpte_group_size); + dml_print("DML: mpte_group_size = 0x%x\n", rq_regs->mpte_group_size); + dml_print("DML: swath_height = 0x%x\n", rq_regs->swath_height); + dml_print("DML: pte_row_height_linear = 0x%x\n", rq_regs->pte_row_height_linear); + dml_print("DML: ===================================== \n"); +} + +void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_RQ_REGS_ST\n"); + dml_print("DML: <LUMA> \n"); + dml_print_data_rq_regs_st(&rq_regs->rq_regs_l); + dml_print("DML: <CHROMA> \n"); + dml_print_data_rq_regs_st(&rq_regs->rq_regs_c); + dml_print("DML: drq_expansion_mode = 0x%x\n", rq_regs->drq_expansion_mode); + dml_print("DML: prq_expansion_mode = 0x%x\n", rq_regs->prq_expansion_mode); + dml_print("DML: mrq_expansion_mode = 0x%x\n", rq_regs->mrq_expansion_mode); + dml_print("DML: crq_expansion_mode = 0x%x\n", rq_regs->crq_expansion_mode); + dml_print("DML: plane1_base_address = 0x%x\n", rq_regs->plane1_base_address); + dml_print("DML: ===================================== \n"); +} + +void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_DLG_REGS_ST \n"); + dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end); + dml_print("DML: dlg_vblank_end = 0x%x\n", dlg_regs->dlg_vblank_end); + dml_print("DML: min_dst_y_next_start = 0x%x\n", dlg_regs->min_dst_y_next_start); + dml_print("DML: refcyc_per_htotal = 0x%x\n", dlg_regs->refcyc_per_htotal); + dml_print("DML: refcyc_x_after_scaler = 0x%x\n", dlg_regs->refcyc_x_after_scaler); + dml_print("DML: dst_y_after_scaler = 0x%x\n", dlg_regs->dst_y_after_scaler); + dml_print("DML: dst_y_prefetch = 0x%x\n", dlg_regs->dst_y_prefetch); + dml_print("DML: dst_y_per_vm_vblank = 0x%x\n", dlg_regs->dst_y_per_vm_vblank); + dml_print("DML: dst_y_per_row_vblank = 0x%x\n", dlg_regs->dst_y_per_row_vblank); + dml_print("DML: dst_y_per_vm_flip = 0x%x\n", dlg_regs->dst_y_per_vm_flip); + dml_print("DML: dst_y_per_row_flip = 0x%x\n", dlg_regs->dst_y_per_row_flip); + dml_print("DML: ref_freq_to_pix_freq = 0x%x\n", dlg_regs->ref_freq_to_pix_freq); + dml_print("DML: vratio_prefetch = 0x%x\n", dlg_regs->vratio_prefetch); + dml_print("DML: vratio_prefetch_c = 0x%x\n", dlg_regs->vratio_prefetch_c); + dml_print("DML: refcyc_per_pte_group_vblank_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_l); + dml_print("DML: refcyc_per_pte_group_vblank_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_c); + dml_print("DML: refcyc_per_meta_chunk_vblank_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_l); + dml_print("DML: refcyc_per_meta_chunk_vblank_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_c); + dml_print("DML: refcyc_per_pte_group_flip_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_l); + dml_print("DML: refcyc_per_pte_group_flip_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_c); + dml_print("DML: refcyc_per_meta_chunk_flip_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_l); + dml_print("DML: refcyc_per_meta_chunk_flip_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_c); + dml_print("DML: dst_y_per_pte_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_l); + dml_print("DML: dst_y_per_pte_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_c); + dml_print("DML: refcyc_per_pte_group_nom_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_l); + dml_print("DML: refcyc_per_pte_group_nom_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_c); + dml_print("DML: dst_y_per_meta_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_l); + dml_print("DML: dst_y_per_meta_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_c); + dml_print("DML: refcyc_per_meta_chunk_nom_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_l); + dml_print("DML: refcyc_per_meta_chunk_nom_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_c); + dml_print("DML: refcyc_per_line_delivery_pre_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_l); + dml_print("DML: refcyc_per_line_delivery_pre_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_c); + dml_print("DML: refcyc_per_line_delivery_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_l); + dml_print("DML: refcyc_per_line_delivery_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_c); + dml_print("DML: refcyc_per_vm_group_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_group_vblank); + dml_print("DML: refcyc_per_vm_group_flip = 0x%x\n", dlg_regs->refcyc_per_vm_group_flip); + dml_print("DML: refcyc_per_vm_req_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_req_vblank); + dml_print("DML: refcyc_per_vm_req_flip = 0x%x\n", dlg_regs->refcyc_per_vm_req_flip); + dml_print("DML: chunk_hdl_adjust_cur0 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur0); + dml_print("DML: dst_y_offset_cur1 = 0x%x\n", dlg_regs->dst_y_offset_cur1); + dml_print("DML: chunk_hdl_adjust_cur1 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur1); + dml_print("DML: vready_after_vcount0 = 0x%x\n", dlg_regs->vready_after_vcount0); + dml_print("DML: dst_y_delta_drq_limit = 0x%x\n", dlg_regs->dst_y_delta_drq_limit); + dml_print("DML: refcyc_per_vm_dmdata = 0x%x\n", dlg_regs->refcyc_per_vm_dmdata); + dml_print("DML: ===================================== \n"); +} + +void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_TTU_REGS_ST \n"); + dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm); + dml_print("DML: qos_level_high_wm = 0x%x\n", ttu_regs->qos_level_high_wm); + dml_print("DML: min_ttu_vblank = 0x%x\n", ttu_regs->min_ttu_vblank); + dml_print("DML: qos_level_flip = 0x%x\n", ttu_regs->qos_level_flip); + dml_print("DML: refcyc_per_req_delivery_pre_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_l); + dml_print("DML: refcyc_per_req_delivery_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_l); + dml_print("DML: refcyc_per_req_delivery_pre_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_c); + dml_print("DML: refcyc_per_req_delivery_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_c); + dml_print("DML: refcyc_per_req_delivery_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur0); + dml_print("DML: refcyc_per_req_delivery_pre_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur0); + dml_print("DML: refcyc_per_req_delivery_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur1); + dml_print("DML: refcyc_per_req_delivery_pre_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur1); + dml_print("DML: qos_level_fixed_l = 0x%x\n", ttu_regs->qos_level_fixed_l); + dml_print("DML: qos_ramp_disable_l = 0x%x\n", ttu_regs->qos_ramp_disable_l); + dml_print("DML: qos_level_fixed_c = 0x%x\n", ttu_regs->qos_level_fixed_c); + dml_print("DML: qos_ramp_disable_c = 0x%x\n", ttu_regs->qos_ramp_disable_c); + dml_print("DML: qos_level_fixed_cur0 = 0x%x\n", ttu_regs->qos_level_fixed_cur0); + dml_print("DML: qos_ramp_disable_cur0 = 0x%x\n", ttu_regs->qos_ramp_disable_cur0); + dml_print("DML: qos_level_fixed_cur1 = 0x%x\n", ttu_regs->qos_level_fixed_cur1); + dml_print("DML: qos_ramp_disable_cur1 = 0x%x\n", ttu_regs->qos_ramp_disable_cur1); + dml_print("DML: ===================================== \n"); +} + +void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DML_MODE_EVAL_POLICY_ST\n"); + dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting); + dml_print("DML: Policy: UseMinimumRequiredDCFCLK = 0x%x\n", policy->UseMinimumRequiredDCFCLK); + dml_print("DML: Policy: DRAMClockChangeRequirementFinal = 0x%x\n", policy->DRAMClockChangeRequirementFinal); + dml_print("DML: Policy: FCLKChangeRequirementFinal = 0x%x\n", policy->FCLKChangeRequirementFinal); + dml_print("DML: Policy: USRRetrainingRequiredFinal = 0x%x\n", policy->USRRetrainingRequiredFinal); + dml_print("DML: Policy: EnhancedPrefetchScheduleAccelerationFinal = 0x%x\n", policy->EnhancedPrefetchScheduleAccelerationFinal); + dml_print("DML: Policy: NomDETInKByteOverrideEnable = 0x%x\n", policy->NomDETInKByteOverrideEnable); + dml_print("DML: Policy: NomDETInKByteOverrideValue = 0x%x\n", policy->NomDETInKByteOverrideValue); + dml_print("DML: Policy: DCCProgrammingAssumesScanDirectionUnknownFinal = 0x%x\n", policy->DCCProgrammingAssumesScanDirectionUnknownFinal); + dml_print("DML: Policy: SynchronizeTimingsFinal = 0x%x\n", policy->SynchronizeTimingsFinal); + dml_print("DML: Policy: SynchronizeDRRDisplaysForUCLKPStateChangeFinal = 0x%x\n", policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal); + dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported); + dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported); + + for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { + dml_print("DML: i=%0d, Policy: MPCCombineUse = 0x%x\n", i, policy->MPCCombineUse[i]); + dml_print("DML: i=%0d, Policy: ODMUse = 0x%x\n", i, policy->ODMUse[i]); + dml_print("DML: i=%0d, Policy: ImmediateFlipRequirement = 0x%x\n", i, policy->ImmediateFlipRequirement[i]); + dml_print("DML: i=%0d, Policy: AllowForPStateChangeOrStutterInVBlank = 0x%x\n", i, policy->AllowForPStateChangeOrStutterInVBlank[i]); + } + dml_print("DML: ===================================== \n"); +} + +void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j) +{ + dml_print("DML: MODE SUPPORT: ===============================================\n"); + dml_print("DML: MODE SUPPORT: Voltage State %d\n", j); + dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Scale Ratio And Taps : %s\n", mode_lib->ms.support.ScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Source Format Pixel And Scan : %s\n", mode_lib->ms.support.SourceFormatPixelAndScanSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Viewport Size : %s\n", mode_lib->ms.support.ViewportSizeSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Link Rate Does Not Match DP Version : %s\n", mode_lib->ms.support.LinkRateDoesNotMatchDPVersion == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Link Rate For Multistream Not Indicated : %s\n", mode_lib->ms.support.LinkRateForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: BPP For Multi stream Not Indicated : %s\n", mode_lib->ms.support.BPPForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Multistream With HDMI Or eDP : %s\n", mode_lib->ms.support.MultistreamWithHDMIOreDP == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Exceeded Multistream Slots : %s\n", mode_lib->ms.support.ExceededMultistreamSlots == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: MSO Or ODM Split With Non DP Link : %s\n", mode_lib->ms.support.MSOOrODMSplitWithNonDPLink == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Not Enough Lanes For MSO : %s\n", mode_lib->ms.support.NotEnoughLanesForMSO == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: LinkCapacitySupport : %s\n", mode_lib->ms.support.LinkCapacitySupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: P2IWith420 : %s\n", mode_lib->ms.support.P2IWith420 == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSCOnlyIfNecessaryWithBPP : %s\n", mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSC422NativeNotSupported : %s\n", mode_lib->ms.support.DSC422NativeNotSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: MPCCombineMethodIncompatible : %s\n", mode_lib->ms.support.MPCCombineMethodIncompatible == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ODMCombineTwoToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ODMCombineFourToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: NotEnoughDSCUnits : %s\n", mode_lib->ms.support.NotEnoughDSCUnits == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: NotEnoughDSCSlices : %s\n", mode_lib->ms.support.NotEnoughDSCSlices == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe : %s\n", mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPStateAndStaticScreen : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSCCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: PixelsPerLinePerDSCUnitSupport : %s\n", mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DTBCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPState : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPState == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified : %s\n", mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ROB Support : %s\n", mode_lib->ms.support.ROBSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DISPCLK DPPCLK Support : %s\n", mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Total Available Pipes Support : %s\n", mode_lib->ms.support.TotalAvailablePipesSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Number Of OTG Support : %s\n", mode_lib->ms.support.NumberOfOTGSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Number Of DP2p0 Support : %s\n", mode_lib->ms.support.NumberOfDP2p0Support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Writeback Latency Support : %s\n", mode_lib->ms.support.WritebackLatencySupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Writeback Scale Ratio And Taps Support : %s\n", mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Cursor Support : %s\n", mode_lib->ms.support.CursorSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Pitch Support : %s\n", mode_lib->ms.support.PitchSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Viewport Exceeds Surface : %s\n", mode_lib->ms.support.ViewportExceedsSurface == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Prefetch Supported : %s\n", mode_lib->ms.support.PrefetchSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: VActive Bandwith Support : %s\n", mode_lib->ms.support.VActiveBandwithSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Dynamic Metadata Supported : %s\n", mode_lib->ms.support.DynamicMetadataSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Total Vertical Active Bandwidth Support : %s\n", mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: VRatio In Prefetch Supported : %s\n", mode_lib->ms.support.VRatioInPrefetchSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: PTE Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DCC Meta Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Non supported DSC Input BPC : %s\n", mode_lib->ms.support.NonsupportedDSCInputBPC == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Exceeded MALL Size : %s\n", mode_lib->ms.support.ExceededMALLSize == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Host VM or Immediate Flip Supported : %s\n", ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !mode_lib->scratch.dml_core_mode_support_locals.ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: dram clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.dram_clock_change_support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: f_clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.f_clock_change_support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: USR Retraining Support : %s\n", (!mode_lib->ms.policy.USRRetrainingRequiredFinal || &mode_lib->ms.support.USRRetrainingSupport[j]) ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ===============================================\n"); +} + +void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ModeIsSupported == 0) + dml_print("DML: support: ModeIsSupported = 0x%x\n", support->ModeIsSupported); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml_print("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml_print("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml_print("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml_print("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->MPCCombineMethodIncompatible == 1) + dml_print("DML: support: MPCCombineMethodIncompatible = 0x%x\n", support->MPCCombineMethodIncompatible); + if (!fail_only || support->P2IWith420 == 1) + dml_print("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420); + if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) + dml_print("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml_print("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml_print("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml_print("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml_print("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml_print("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml_print("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml_print("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml_print("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml_print("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support); + if (!fail_only || support->NonsupportedDSCInputBPC == 1) + dml_print("DML: support: NonsupportedDSCInputBPC = 0x%x\n", support->NonsupportedDSCInputBPC); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml_print("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->CursorSupport == 0) + dml_print("DML: support: CursorSupport = 0x%x\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml_print("DML: support: PitchSupport = 0x%x\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml_print("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface); + if (!fail_only || support->ExceededMALLSize == 1) + dml_print("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml_print("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits); + if (!fail_only || support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == 1) + dml_print("DML: support: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = 0x%x\n", support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml_print("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml_print("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml_print("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState); + + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml_print("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots); + if (!fail_only || support->ODMCombineTwoToOneSupportCheckOK == 0) + dml_print("DML: support: ODMCombineTwoToOneSupportCheckOK = 0x%x\n", support->ODMCombineTwoToOneSupportCheckOK); + if (!fail_only || support->ODMCombineFourToOneSupportCheckOK == 0) + dml_print("DML: support: ODMCombineFourToOneSupportCheckOK = 0x%x\n", support->ODMCombineFourToOneSupportCheckOK); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml_print("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml_print("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml_print("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml_print("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml_print("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->LinkCapacitySupport == 0) + dml_print("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport); + + for (dml_uint_t j = 0; j < 2; j++) { + if (!fail_only || support->DRAMClockChangeSupport[j] == dml_dram_clock_change_unsupported) + dml_print("DML: support: combine=%d, DRAMClockChangeSupport = %d\n", j, support->DRAMClockChangeSupport[j]); + if (!fail_only || support->FCLKChangeSupport[j] == dml_fclock_change_unsupported) + dml_print("DML: support: combine=%d, FCLKChangeSupport = %d\n", j, support->FCLKChangeSupport[j]); + if (!fail_only || support->ROBSupport[j] == 0) + dml_print("DML: support: combine=%d, ROBSupport = %d\n", j, support->ROBSupport[j]); + if (!fail_only || support->PTEBufferSizeNotExceeded[j] == 0) + dml_print("DML: support: combine=%d, PTEBufferSizeNotExceeded = %d\n", j, support->PTEBufferSizeNotExceeded[j]); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded[j] == 0) + dml_print("DML: support: combine=%d, DCCMetaBufferSizeNotExceeded = %d\n", j, support->DCCMetaBufferSizeNotExceeded[j]); + if (!fail_only || support->TotalVerticalActiveBandwidthSupport[j] == 0) + dml_print("DML: support: combine=%d, TotalVerticalActiveBandwidthSupport = %d\n", j, support->TotalVerticalActiveBandwidthSupport[j]); + if (!fail_only || support->USRRetrainingSupport[j] == 0) + dml_print("DML: support: combine=%d, USRRetrainingSupport = %d\n", j, support->USRRetrainingSupport[j]); + if (!fail_only || support->VActiveBandwithSupport[j] == 0) + dml_print("DML: support: combine=%d, VActiveBandwithSupport = %d\n", j, support->VActiveBandwithSupport[j]); + if (!fail_only || support->PrefetchSupported[j] == 0) + dml_print("DML: support: combine=%d, PrefetchSupported = %d\n", j, support->PrefetchSupported[j]); + if (!fail_only || support->DynamicMetadataSupported[j] == 0) + dml_print("DML: support: combine=%d, DynamicMetadataSupported = %d\n", j, support->DynamicMetadataSupported[j]); + if (!fail_only || support->VRatioInPrefetchSupported[j] == 0) + dml_print("DML: support: combine=%d, VRatioInPrefetchSupported = %d\n", j, support->VRatioInPrefetchSupported[j]); + if (!fail_only || support->DISPCLK_DPPCLK_Support[j] == 0) + dml_print("DML: support: combine=%d, DISPCLK_DPPCLK_Support = %d\n", j, support->DISPCLK_DPPCLK_Support[j]); + if (!fail_only || support->TotalAvailablePipesSupport[j] == 0) + dml_print("DML: support: combine=%d, TotalAvailablePipesSupport = %d\n", j, support->TotalAvailablePipesSupport[j]); + if (!fail_only || support->ModeSupport[j] == 0) + dml_print("DML: support: combine=%d, ModeSupport = %d\n", j, support->ModeSupport[j]); + if (!fail_only || support->ViewportSizeSupport[j] == 0) + dml_print("DML: support: combine=%d, ViewportSizeSupport = %d\n", j, support->ViewportSizeSupport[j]); + if (!fail_only || support->ImmediateFlipSupportedForState[j] == 0) + dml_print("DML: support: combine=%d, ImmediateFlipSupportedForState = %d\n", j, support->ImmediateFlipSupportedForState[j]); + } +} + +void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]); + dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]); + dml_print("DML: timing_cfg: plane=%d, HActive = %d\n", i, timing->HActive[i]); + dml_print("DML: timing_cfg: plane=%d, VActive = %d\n", i, timing->VActive[i]); + dml_print("DML: timing_cfg: plane=%d, VFrontPorch = %d\n", i, timing->VFrontPorch[i]); + dml_print("DML: timing_cfg: plane=%d, VBlankNom = %d\n", i, timing->VBlankNom[i]); + dml_print("DML: timing_cfg: plane=%d, RefreshRate = %d\n", i, timing->RefreshRate[i]); + dml_print("DML: timing_cfg: plane=%d, PixelClock = %f\n", i, timing->PixelClock[i]); + dml_print("DML: timing_cfg: plane=%d, Interlace = %d\n", i, timing->Interlace[i]); + dml_print("DML: timing_cfg: plane=%d, DRRDisplay = %d\n", i, timing->DRRDisplay[i]); + } +} + +void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane) +{ + dml_print("DML: plane_cfg: num_plane = %d\n", num_plane); + dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable); + dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable); + dml_print("DML: plane_cfg: GPUVMMaxPageTableLevels = %d\n", plane->GPUVMMaxPageTableLevels); + dml_print("DML: plane_cfg: HostVMMaxPageTableLevels = %d\n", plane->HostVMMaxPageTableLevels); + + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: plane_cfg: plane=%d, GPUVMMinPageSizeKBytes = %d\n", i, plane->GPUVMMinPageSizeKBytes[i]); + dml_print("DML: plane_cfg: plane=%d, ForceOneRowForFrame = %d\n", i, plane->ForceOneRowForFrame[i]); + dml_print("DML: plane_cfg: plane=%d, PTEBufferModeOverrideEn = %d\n", i, plane->PTEBufferModeOverrideEn[i]); + dml_print("DML: plane_cfg: plane=%d, PTEBufferMode = %d\n", i, plane->PTEBufferMode[i]); + dml_print("DML: plane_cfg: plane=%d, DETSizeOverride = %d\n", i, plane->DETSizeOverride[i]); + dml_print("DML: plane_cfg: plane=%d, UseMALLForStaticScreen = %d\n", i, plane->UseMALLForStaticScreen[i]); + dml_print("DML: plane_cfg: plane=%d, UseMALLForPStateChange = %d\n", i, plane->UseMALLForPStateChange[i]); + dml_print("DML: plane_cfg: plane=%d, BlendingAndTiming = %d\n", i, plane->BlendingAndTiming[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportWidth = %d\n", i, plane->ViewportWidth[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportHeight = %d\n", i, plane->ViewportHeight[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportWidthChroma = %d\n", i, plane->ViewportWidthChroma[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportHeightChroma = %d\n", i, plane->ViewportHeightChroma[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportXStart = %d\n", i, plane->ViewportXStart[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportXStartC = %d\n", i, plane->ViewportXStartC[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportYStart = %d\n", i, plane->ViewportYStart[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportYStartC = %d\n", i, plane->ViewportYStartC[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportStationary = %d\n", i, plane->ViewportStationary[i]); + dml_print("DML: plane_cfg: plane=%d, ScalerEnabled = %d\n", i, plane->ScalerEnabled[i]); + dml_print("DML: plane_cfg: plane=%d, HRatio = %3.2f\n", i, plane->HRatio[i]); + dml_print("DML: plane_cfg: plane=%d, VRatio = %3.2f\n", i, plane->VRatio[i]); + dml_print("DML: plane_cfg: plane=%d, HRatioChroma = %3.2f\n", i, plane->HRatioChroma[i]); + dml_print("DML: plane_cfg: plane=%d, VRatioChroma = %3.2f\n", i, plane->VRatioChroma[i]); + dml_print("DML: plane_cfg: plane=%d, HTaps = %d\n", i, plane->HTaps[i]); + dml_print("DML: plane_cfg: plane=%d, VTaps = %d\n", i, plane->VTaps[i]); + dml_print("DML: plane_cfg: plane=%d, HTapsChroma = %d\n", i, plane->HTapsChroma[i]); + dml_print("DML: plane_cfg: plane=%d, VTapsChroma = %d\n", i, plane->VTapsChroma[i]); + dml_print("DML: plane_cfg: plane=%d, LBBitPerPixel = %d\n", i, plane->LBBitPerPixel[i]); + dml_print("DML: plane_cfg: plane=%d, SourceScan = %d\n", i, plane->SourceScan[i]); + dml_print("DML: plane_cfg: plane=%d, ScalerRecoutWidth = %d\n", i, plane->ScalerRecoutWidth[i]); + dml_print("DML: plane_cfg: plane=%d, NumberOfCursors = %d\n", i, plane->NumberOfCursors[i]); + dml_print("DML: plane_cfg: plane=%d, CursorWidth = %d\n", i, plane->CursorWidth[i]); + dml_print("DML: plane_cfg: plane=%d, CursorBPP = %d\n", i, plane->CursorBPP[i]); + + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataEnable = %d\n", i, plane->DynamicMetadataEnable[i]); + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataLinesBeforeActiveRequired = %d\n", i, plane->DynamicMetadataLinesBeforeActiveRequired[i]); + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataTransmittedBytes = %d\n", i, plane->DynamicMetadataTransmittedBytes[i]); + } +} + +void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceHeightY = %d\n", i, surface->SurfaceHeightY[i]); + dml_print("DML: surface_cfg: plane=%d, PitchC = %d\n", i, surface->PitchC[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceWidthC = %d\n", i, surface->SurfaceWidthC[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceHeightC = %d\n", i, surface->SurfaceHeightC[i]); + dml_print("DML: surface_cfg: plane=%d, DCCEnable = %d\n", i, surface->DCCEnable[i]); + dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchY = %d\n", i, surface->DCCMetaPitchY[i]); + dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchC = %d\n", i, surface->DCCMetaPitchC[i]); + dml_print("DML: surface_cfg: plane=%d, DCCRateLuma = %f\n", i, surface->DCCRateLuma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCRateChroma = %f\n", i, surface->DCCRateChroma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsLuma = %f\n", i, surface->DCCFractionOfZeroSizeRequestsLuma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsChroma= %f\n", i, surface->DCCFractionOfZeroSizeRequestsChroma[i]); + } +} + +void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]); + dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]); + dml_print("DML: hw_resource: plane=%d, DSCEnabled = %d\n", i, hw->DSCEnabled[i]); + dml_print("DML: hw_resource: plane=%d, NumberOfDSCSlices = %d\n", i, hw->NumberOfDSCSlices[i]); + } + dml_print("DML: hw_resource: DLGRefClkFreqMHz = %f\n", hw->DLGRefClkFreqMHz); +} + +__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state) +{ + dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz); + dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz); + dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz); + dml_print("DML: state_bbox: phyclk_d18_mhz = %f\n", state->phyclk_d18_mhz); + dml_print("DML: state_bbox: phyclk_d32_mhz = %f\n", state->phyclk_d32_mhz); + dml_print("DML: state_bbox: dtbclk_mhz = %f\n", state->dtbclk_mhz); + dml_print("DML: state_bbox: dispclk_mhz = %f\n", state->dispclk_mhz); + dml_print("DML: state_bbox: dppclk_mhz = %f\n", state->dppclk_mhz); + dml_print("DML: state_bbox: fabricclk_mhz = %f\n", state->fabricclk_mhz); + dml_print("DML: state_bbox: dcfclk_mhz = %f\n", state->dcfclk_mhz); + dml_print("DML: state_bbox: dram_speed_mts = %f\n", state->dram_speed_mts); + dml_print("DML: state_bbox: urgent_latency_pixel_data_only_us = %f\n", state->urgent_latency_pixel_data_only_us); + dml_print("DML: state_bbox: urgent_latency_pixel_mixed_with_vm_data_us = %f\n", state->urgent_latency_pixel_mixed_with_vm_data_us); + dml_print("DML: state_bbox: urgent_latency_vm_data_only_us = %f\n", state->urgent_latency_vm_data_only_us); + dml_print("DML: state_bbox: writeback_latency_us = %f\n", state->writeback_latency_us); + dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_component_us = %f\n", state->urgent_latency_adjustment_fabric_clock_component_us); + dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_reference_mhz= %f\n", state->urgent_latency_adjustment_fabric_clock_reference_mhz); + dml_print("DML: state_bbox: sr_exit_time_us = %f\n", state->sr_exit_time_us); + dml_print("DML: state_bbox: sr_enter_plus_exit_time_us = %f\n", state->sr_enter_plus_exit_time_us); + dml_print("DML: state_bbox: sr_exit_z8_time_us = %f\n", state->sr_exit_z8_time_us); + dml_print("DML: state_bbox: sr_enter_plus_exit_z8_time_us = %f\n", state->sr_enter_plus_exit_z8_time_us); + dml_print("DML: state_bbox: dram_clock_change_latency_us = %f\n", state->dram_clock_change_latency_us); + dml_print("DML: state_bbox: fclk_change_latency_us = %f\n", state->fclk_change_latency_us); + dml_print("DML: state_bbox: usr_retraining_latency_us = %f\n", state->usr_retraining_latency_us); + dml_print("DML: state_bbox: use_ideal_dram_bw_strobe = %d\n", state->use_ideal_dram_bw_strobe); +} + +__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc) +{ + dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz); + dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz); + dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz); + dml_print("DML: soc_bbox: refclk_mhz = %f\n", soc->refclk_mhz); + dml_print("DML: soc_bbox: amclk_mhz = %f\n", soc->amclk_mhz); + + dml_print("DML: soc_bbox: max_outstanding_reqs = %f\n", soc->max_outstanding_reqs); + dml_print("DML: soc_bbox: pct_ideal_sdp_bw_after_urgent = %f\n", soc->pct_ideal_sdp_bw_after_urgent); + dml_print("DML: soc_bbox: pct_ideal_fabric_bw_after_urgent = %f\n", soc->pct_ideal_fabric_bw_after_urgent); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_only); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_and_vm = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_vm_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_vm_only); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_strobe = %f\n", soc->pct_ideal_dram_bw_after_urgent_strobe); + dml_print("DML: soc_bbox: max_avg_sdp_bw_use_normal_percent = %f\n", soc->max_avg_sdp_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_fabric_bw_use_normal_percent = %f\n", soc->max_avg_fabric_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_percent = %f\n", soc->max_avg_dram_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_strobe_percent = %f\n", soc->max_avg_dram_bw_use_normal_strobe_percent); + dml_print("DML: soc_bbox: round_trip_ping_latency_dcfclk_cycles = %d\n", soc->round_trip_ping_latency_dcfclk_cycles); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_only_bytes); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_vm_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_vm_only_bytes); + dml_print("DML: soc_bbox: num_chans = %d\n", soc->num_chans); + dml_print("DML: soc_bbox: return_bus_width_bytes = %d\n", soc->return_bus_width_bytes); + dml_print("DML: soc_bbox: dram_channel_width_bytes = %d\n", soc->dram_channel_width_bytes); + dml_print("DML: soc_bbox: fabric_datapath_to_dcn_data_return_bytes = %d\n", soc->fabric_datapath_to_dcn_data_return_bytes); + dml_print("DML: soc_bbox: hostvm_min_page_size_kbytes = %d\n", soc->hostvm_min_page_size_kbytes); + dml_print("DML: soc_bbox: gpuvm_min_page_size_kbytes = %d\n", soc->gpuvm_min_page_size_kbytes); + dml_print("DML: soc_bbox: phy_downspread_percent = %f\n", soc->phy_downspread_percent); + dml_print("DML: soc_bbox: dcn_downspread_percent = %f\n", soc->dcn_downspread_percent); + dml_print("DML: soc_bbox: smn_latency_us = %f\n", soc->smn_latency_us); + dml_print("DML: soc_bbox: mall_allocated_for_dcn_mbytes = %d\n", soc->mall_allocated_for_dcn_mbytes); + dml_print("DML: soc_bbox: dispclk_dppclk_vco_speed_mhz = %f\n", soc->dispclk_dppclk_vco_speed_mhz); + dml_print("DML: soc_bbox: do_urgent_latency_adjustment = %d\n", soc->do_urgent_latency_adjustment); +} + +__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) +{ + dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n"); + dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); + dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); + + dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz); + dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz); + + for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { + dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]); + dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]); + } +} + +dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle Scan) +{ + dml_bool_t is_vert = false; + if (Scan == dml_rotation_90 || Scan == dml_rotation_90m || Scan == dml_rotation_270 || Scan == dml_rotation_270m) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} // dml_is_vertical_rotation + +dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp) +{ + switch (ebpp) { + case dml_cur_2bit: + return 2; + case dml_cur_32bit: + return 32; + case dml_cur_64bit: + return 64; + default: + return 0; + } +} + +/// @brief Determine the physical pipe to logical plane mapping using the display_cfg +dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg) +{ + dml_uint_t num_active_planes = 0; + + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; k++) { + if (display_cfg->plane.ViewportWidth[k] > 0) + num_active_planes = num_active_planes + 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: num_active_planes = %d\n", __func__, num_active_planes); +#endif + return num_active_planes; +} + +/// @brief Determine the physical pipe to logical plane mapping using the display_cfg +dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg) +{ + dml_uint_t num_active_pipes = 0; + + for (dml_uint_t j = 0; j < dml_get_num_active_planes(display_cfg); j++) { + num_active_pipes = num_active_pipes + display_cfg->hw.DPPPerSurface[j]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx) +{ + dml_uint_t pipe_idx = 0; + dml_bool_t pipe_found = 0; + + ASSERT(plane_idx < __DML_NUM_PLANES__); + + for (dml_uint_t i = 0; i < __DML_NUM_PLANES__; i++) { + if (plane_idx == mode_lib->mp.pipe_plane[i]) { + pipe_idx = i; + pipe_found = 1; + break; + } + } + ASSERT(pipe_found != 0); + + return pipe_idx; +} + +void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane) +{ + dml_uint_t pipe_idx = 0; + + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) { + pipe_plane[k] = __DML_PIPE_NO_PLANE__; + } + + for (dml_uint_t plane_idx = 0; plane_idx < __DML_NUM_PLANES__; plane_idx++) { + for (dml_uint_t i = 0; i < hw->DPPPerSurface[plane_idx]; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h new file mode 100644 index 000000000000..a82b49cf7fb0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_UTIL_H__ +#define __DISPLAY_MODE_UTIL_H__ + +#include "display_mode_core_structs.h" +#include "cmntypes.h" + +#include "dml_assert.h" +#include "dml_logging.h" + +__DML_DLL_EXPORT__ dml_bool_t dml_util_is_420(enum dml_source_format_class source_format); +__DML_DLL_EXPORT__ dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity); +__DML_DLL_EXPORT__ dml_float_t dml_floor(dml_float_t x, dml_float_t granularity); +__DML_DLL_EXPORT__ dml_float_t dml_min(dml_float_t x, dml_float_t y); +__DML_DLL_EXPORT__ dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z); +__DML_DLL_EXPORT__ dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w); +__DML_DLL_EXPORT__ dml_float_t dml_max(dml_float_t x, dml_float_t y); +__DML_DLL_EXPORT__ dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z); +__DML_DLL_EXPORT__ dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d); +__DML_DLL_EXPORT__ dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e); +__DML_DLL_EXPORT__ dml_float_t dml_log(dml_float_t x, dml_float_t base); +__DML_DLL_EXPORT__ dml_float_t dml_log2(dml_float_t x); +__DML_DLL_EXPORT__ dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding); +__DML_DLL_EXPORT__ dml_float_t dml_pow(dml_float_t base, int exp); +__DML_DLL_EXPORT__ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up); +__DML_DLL_EXPORT__ dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle scan); +__DML_DLL_EXPORT__ dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp); +__DML_DLL_EXPORT__ void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *data_rq_regs); +__DML_DLL_EXPORT__ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs); +__DML_DLL_EXPORT__ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs); +__DML_DLL_EXPORT__ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs); +__DML_DLL_EXPORT__ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy); +__DML_DLL_EXPORT__ void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j); +__DML_DLL_EXPORT__ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state); +__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc); +__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg); + +__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg); +__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg); +__DML_DLL_EXPORT__ dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); +__DML_DLL_EXPORT__ dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx); +__DML_DLL_EXPORT__ void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c new file mode 100644 index 000000000000..bf5e7f4e0416 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c @@ -0,0 +1,929 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml21_wrapper.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "soc_and_ip_translator.h" + +static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, + const struct dc *in_dc, + const struct dml2_configuration_options *config) +{ + bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable; + + /* ODM options */ + pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; + pmo_options->disable_dyn_odm_for_multi_stream = true; + pmo_options->disable_dyn_odm_for_stream_with_svp = true; + + pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); + + /* NOTE: DRR and SubVP Require FAMS2 */ + pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || + in_dc->debug.force_disable_subvp || + disable_fams2; + pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || + disable_fams2; + pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || + disable_fams2; + pmo_options->disable_fams2 = disable_fams2; + + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; +} + +static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version) +{ + enum dml2_project_id project_id; + switch (dcn_version) { + case DCN_VERSION_4_01: + project_id = dml2_project_dcn4x_stage2_auto_drr_svp; + break; + default: + project_id = dml2_project_invalid; + DC_ERR("unsupported dcn version for DML21!"); + break; + } + + return project_id; +} + +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version); + + if (config->use_native_soc_bb_construction) { + in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config); + in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps); + } else { + dml_init->soc_bb = config->external_socbb_ip_params->soc_bb; + dml_init->ip_caps = config->external_socbb_ip_params->ip_params; + } + + dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); +} + +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + +static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, + struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, + struct dml2_context *dml_ctx) +{ + unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; + uint32_t pix_clk_100hz; + + timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; + timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + timing->h_front_porch = stream->timing.h_front_porch; + timing->v_front_porch = stream->timing.v_front_porch; + timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10; + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + timing->pixel_clock_khz *= 2; + timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; + timing->v_total = stream->timing.v_total; + timing->h_sync_width = stream->timing.h_sync_width; + timing->interlaced = stream->timing.flags.INTERLACE; + + hblank_start = stream->timing.h_total - stream->timing.h_front_porch; + + timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding + - stream->timing.h_border_left - stream->timing.h_border_right; + + if (hblank_start < stream->timing.h_addressable) + timing->h_blank_end = 0; + + vblank_start = stream->timing.v_total - stream->timing.v_front_porch; + + timing->v_blank_end = vblank_start - stream->timing.v_addressable + - stream->timing.v_border_top - stream->timing.v_border_bottom; + + timing->drr_config.enabled = stream->ignore_msa_timing_param; + timing->drr_config.drr_active_variable = stream->vrr_active_variable; + timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; + timing->drr_config.disallowed = !stream->allow_freesync; + + /* limit min refresh rate to DC cap */ + min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; + if (stream->ctx->dc->caps.max_v_total != 0) { + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) { + pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } else { + pix_clk_100hz = stream->timing.pix_clk_100hz; + } + min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL), + (timing->h_total * (long long)calc_max_hardware_v_total(stream))); + } + + timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); + + if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && + stream->ctx->dc->config.enable_fpo_flicker_detection == 1) + timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false); + else + timing->drr_config.max_instant_vtotal_delta = 0; + + if (stream->timing.flags.DSC) { + timing->dsc.enable = dml2_dsc_enable; + timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h; + timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel; + } else + timing->dsc.enable = dml2_dsc_disable; + + switch (stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + timing->bpc = 6; + break; + case COLOR_DEPTH_888: + timing->bpc = 8; + break; + case COLOR_DEPTH_101010: + timing->bpc = 10; + break; + case COLOR_DEPTH_121212: + timing->bpc = 12; + break; + case COLOR_DEPTH_141414: + timing->bpc = 14; + break; + case COLOR_DEPTH_161616: + timing->bpc = 16; + break; + case COLOR_DEPTH_999: + timing->bpc = 9; + break; + case COLOR_DEPTH_111111: + timing->bpc = 11; + break; + default: + timing->bpc = 8; + break; + } + + timing->vblank_nom = timing->v_total - timing->v_active; +} + +static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, + struct dc_stream_state *stream, const struct pipe_ctx *pipe) +{ + output->output_dp_lane_count = 4; + + switch (stream->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + output->output_encoder = dml2_dp; + if (check_dp2p0_output_encoder(pipe)) + output->output_encoder = dml2_dp2p0; + break; + case SIGNAL_TYPE_EDP: + output->output_encoder = dml2_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + output->output_encoder = dml2_hdmi; + break; + default: + output->output_encoder = dml2_dp; + } + + switch (stream->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + output->output_format = dml2_444; + break; + case PIXEL_ENCODING_YCBCR420: + output->output_format = dml2_420; + break; + case PIXEL_ENCODING_YCBCR422: + if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple) + output->output_format = dml2_n422; + else + output->output_format = dml2_s422; + break; + default: + output->output_format = dml2_444; + break; + } + + switch (stream->signal) { + case SIGNAL_TYPE_NONE: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_RGB: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_VIRTUAL: + default: + output->output_dp_link_rate = dml2_dp_rate_na; + break; + } + + output->audio_sample_layout = stream->audio_info.modes->sample_size; + output->audio_sample_rate = stream->audio_info.modes->max_bit_rate; + output->output_disabled = true; + + //TODO : New to DML2.1. How do we populate this ? + // output->validate_output +} + +static void populate_dml21_stream_overrides_from_stream_state( + struct dml2_stream_parameters *stream_desc, + struct dc_stream_state *stream, + struct dc_stream_status *stream_status) +{ + switch (stream->debug.force_odm_combine_segments) { + case 0: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + case 1: + stream_desc->overrides.odm_mode = dml2_odm_mode_bypass; + break; + case 2: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + } + if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || + stream->debug.force_odm_combine_segments > 0) + stream_desc->overrides.disable_dynamic_odm = true; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || + stream->hw_cursor_req || + stream_status->mall_stream_config.cursor_size_limit_subvp; +} + +static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_linear; + + switch (addr3_mode) { + case DC_ADDR3_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_ADDR3_SW_256B_2D: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_ADDR3_SW_4KB_2D: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_ADDR3_SW_64KB_2D: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_ADDR3_SW_256KB_2D: + dml2_mode = dml2_sw_256kb_2d; + break; + default: + /* invalid swizzle mode for DML2.1 */ + ASSERT(false); + dml2_mode = dml2_sw_linear; + } + + return dml2_mode; +} + +static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d; + + switch (gfx9_mode) { + case DC_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_SW_256_D: + case DC_SW_256_R: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_SW_4KB_D: + case DC_SW_4KB_R: + case DC_SW_4KB_R_X: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_SW_64KB_D: + case DC_SW_64KB_D_X: + case DC_SW_64KB_R: + case DC_SW_64KB_R_X: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_SW_256B_S: + case DC_SW_4KB_S: + case DC_SW_64KB_S: + case DC_SW_VAR_S: + case DC_SW_VAR_D: + case DC_SW_VAR_R: + case DC_SW_64KB_S_T: + case DC_SW_64KB_D_T: + case DC_SW_4KB_S_X: + case DC_SW_4KB_D_X: + case DC_SW_64KB_S_X: + case DC_SW_VAR_S_X: + case DC_SW_VAR_D_X: + case DC_SW_VAR_R_X: + default: + /* + * invalid swizzle mode for DML2.1. This could happen because + * DML21 is not intended to be used by N-1 in production. To + * properly filter out unsupported swizzle modes, we will need + * to fix capability reporting when DML2.1 is used for N-1 in + * dc. So DML will only receive DML21 supported swizzle modes. + * This implementation is not added and has a low value because + * the supported swizzle modes should already cover most of our + * N-1 test cases. + */ + return dml2_sw_64kb_2d; + } + + return dml2_mode; +} + +static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream) +{ + surface->plane0.width = stream->timing.h_addressable; + surface->plane0.height = stream->timing.v_addressable; + surface->plane1.width = stream->timing.h_addressable; + surface->plane1.height = stream->timing.v_addressable; + surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128; + surface->plane1.pitch = 0; + surface->dcc.enable = false; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0; + surface->tiling = dml2_sw_64kb_2d; +} + +static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream) +{ + unsigned int width, height; + + if (stream->timing.h_addressable > 3840) + width = 3840; + else + width = stream->timing.h_addressable; // 4K max + + if (stream->timing.v_addressable > 2160) + height = 2160; + else + height = stream->timing.v_addressable; // 4K max + + plane->cursor.cursor_bpp = 32; + + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + plane->composition.viewport.plane0.width = width; + plane->composition.viewport.plane0.height = height; + plane->composition.viewport.plane1.width = 0; + plane->composition.viewport.plane1.height = 0; + + plane->composition.viewport.stationary = false; + plane->composition.viewport.plane0.x_start = 0; + plane->composition.viewport.plane0.y_start = 0; + plane->composition.viewport.plane1.x_start = 0; + plane->composition.viewport.plane1.y_start = 0; + + plane->composition.scaler_info.enabled = false; + plane->composition.rotation_angle = dml2_rotation_0; + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 0; + plane->composition.scaler_info.plane1.v_ratio = 0; + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 0; + plane->composition.scaler_info.plane1.v_taps = 0; + plane->composition.scaler_info.rect_out_width = width; + plane->pixel_format = dml2_444_32; + + plane->dynamic_meta_data.enable = false; + plane->overrides.gpuvm_min_page_size_kbytes = 256; +} + +static void populate_dml21_surface_config_from_plane_state( + const struct dc *in_dc, + struct dml2_surface_cfg *surface, + const struct dc_plane_state *plane_state) +{ + surface->plane0.pitch = plane_state->plane_size.surface_pitch; + surface->plane1.pitch = plane_state->plane_size.chroma_pitch; + surface->plane0.height = plane_state->plane_size.surface_size.height; + surface->plane0.width = plane_state->plane_size.surface_size.width; + surface->plane1.height = plane_state->plane_size.chroma_size.height; + surface->plane1.width = plane_state->plane_size.chroma_size.width; + surface->dcc.enable = plane_state->dcc.enable; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; + surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; + surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; + + // Update swizzle / array mode based on the gfx_format + switch (plane_state->tiling_info.gfxversion) { + case DcGfxVersion7: + case DcGfxVersion8: + break; + case DcGfxVersion9: + case DcGfxVersion10: + case DcGfxVersion11: + surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); + break; + case DcGfxAddr3: + surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); + break; + } +} + +static const struct scaler_data *get_scaler_data_for_plane( + struct dml2_context *dml_ctx, + const struct dc_plane_state *in, + const struct dc_state *context) +{ + int i; + struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); + + for (i = 0; i < MAX_PIPES; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == in && !pipe->prev_odm_pipe) { + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + temp_pipe->stream_res = pipe->stream_res; + temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding; + temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding; + temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz; + dml_ctx->config.callbacks.build_scaling_params(temp_pipe); + break; + } + } + + ASSERT(i < MAX_PIPES); + return &temp_pipe->plane_res.scl_data; +} + +static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx, + struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state, + const struct dc_state *context, unsigned int stream_index) +{ + const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); + struct dc_stream_state *stream = context->streams[stream_index]; + + plane->cursor.cursor_bpp = 32; + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + switch (plane_state->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + plane->pixel_format = dml2_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + plane->pixel_format = dml2_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + plane->pixel_format = dml2_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + plane->pixel_format = dml2_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + plane->pixel_format = dml2_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + plane->pixel_format = dml2_rgbe_alpha; + break; + default: + plane->pixel_format = dml2_444_32; + break; + } + + plane->composition.viewport.plane0.height = scaler_data->viewport.height; + plane->composition.viewport.plane0.width = scaler_data->viewport.width; + plane->composition.viewport.plane1.height = scaler_data->viewport_c.height; + plane->composition.viewport.plane1.width = scaler_data->viewport_c.width; + plane->composition.viewport.plane0.x_start = scaler_data->viewport.x; + plane->composition.viewport.plane0.y_start = scaler_data->viewport.y; + plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x; + plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y; + plane->composition.viewport.stationary = false; + plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value || + scaler_data->ratios.horz_c.value != dc_fixpt_one.value || + scaler_data->ratios.vert.value != dc_fixpt_one.value || + scaler_data->ratios.vert_c.value != dc_fixpt_one.value; + + if (!scaler_data->taps.h_taps) { + /* Above logic determines scaling should be enabled even when there are no taps for + * certain cases. Hence do corrective active and disable scaling. + */ + plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; + } + + /* always_scale is only used for debug purposes not used in production but has to be + * maintained for certain complainces. */ + if (plane_state->ctx->dc->debug.always_scale == true) { + plane->composition.scaler_info.enabled = true; + } + + if (plane->composition.scaler_info.enabled == false) { + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 1.0; + plane->composition.scaler_info.plane1.v_ratio = 1.0; + } else { + plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32); + plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32); + plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32); + plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32); + } + + if (!scaler_data->taps.h_taps) { + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 1; + } else { + plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps; + plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c; + } + if (!scaler_data->taps.v_taps) { + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.v_taps = 1; + } else { + plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps; + plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c; + } + + plane->composition.viewport.stationary = false; + + if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { + plane->tdlut.setup_for_tdlut = true; + + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; + break; + } + + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_171717: + plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + default: + //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined + break; + } + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; + + plane->dynamic_meta_data.enable = false; + plane->dynamic_meta_data.lines_before_active_required = 0; + plane->dynamic_meta_data.transmitted_bytes = 0; + + plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width; + plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation; + plane->stream_index = stream_index; + + plane->overrides.gpuvm_min_page_size_kbytes = 256; + + plane->immediate_flip = plane_state->flip_immediate; + + plane->composition.rect_out_height_spans_vactive = + plane_state->dst_rect.height >= stream->src.height && + stream->dst.height >= stream->timing.v_addressable; +} + +//TODO : Could be possibly moved to a common helper layer. +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream) +{ + int i = 0; + int location = -1; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { + location = i; + break; + } + } + + return location; +} + +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, + const struct dc_plane_state *plane, const struct dc_state *context) +{ + unsigned int plane_id; + int i = 0; + int location = -1; + + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { + ASSERT(false); + return -1; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { + location = i; + break; + } + } + + return location; +} + +static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method) +{ + enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto; + + switch (force_pstate_method) { + case dml2_force_pstate_method_vactive: + val = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_force_pstate_method_vblank: + val = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_force_pstate_method_drr: + val = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_force_pstate_method_subvp: + val = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_force_pstate_method_auto: + default: + val = dml2_uclk_pstate_change_strategy_auto; + } + + return val; +} + +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + int stream_index, plane_index; + int disp_cfg_stream_location, disp_cfg_plane_location; + struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; + + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + + dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable; + dml_dispcfg->gpuvm_max_page_table_levels = 4; + dml_dispcfg->hostvm_enable = false; + dml_dispcfg->minimize_det_reallocation = true; + dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { + disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); + + if (disp_cfg_stream_location < 0) + disp_cfg_stream_location = dml_dispcfg->num_streams++; + + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx); + populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); + populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); + + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed; + + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id; + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; + + if (context->stream_status[stream_index].plane_count == 0) { + disp_cfg_plane_location = dml_dispcfg->num_planes++; + populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]); + populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + } else { + for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); + + if (disp_cfg_plane_location < 0) + disp_cfg_plane_location = dml_dispcfg->num_planes++; + + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); + populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + + /* apply forced pstate policy */ + if (dml_ctx->config.pmo.force_pstate_method_enable) { + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = + dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); + } + + plane_count++; + } + } + } + + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + + return true; +} + +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) +{ + /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency; +} + +static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) +{ + struct dml2_dchub_watermark_regs *wm_regs = NULL; + + switch (wm_index) { + case DML2_DCHUB_WATERMARK_SET_A: + wm_regs = &watermarks->dcn4x.a; + break; + case DML2_DCHUB_WATERMARK_SET_B: + wm_regs = &watermarks->dcn4x.b; + break; + case DML2_DCHUB_WATERMARK_SET_C: + wm_regs = &watermarks->dcn4x.c; + break; + case DML2_DCHUB_WATERMARK_SET_D: + wm_regs = &watermarks->dcn4x.d; + break; + case DML2_DCHUB_WATERMARK_SET_NUM: + default: + /* invalid wm set index */ + wm_regs = NULL; + } + + return wm_regs; +} + +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx) +{ + const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming; + + unsigned int wm_index; + + /* copy watermark sets from DML */ + for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) { + struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index); + + if (wm_regs) + memcpy(wm_regs, + &programming->global_regs.wm_regs[wm_index], + sizeof(struct dml2_dchub_watermark_regs)); + } +} + +void dml21_map_hw_resources(struct dml2_context *dml_ctx) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true; + } + +} + +void dml21_get_pipe_mcache_config( + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_pipe_configuration_descriptor *mcache_pipe_config) +{ + mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x; + mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width; + + mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x; + mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width; + + mcache_pipe_config->plane1_enabled = + dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); +} + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_pstate_method_fw_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h new file mode 100644 index 000000000000..9880d3e0398e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_TRANSLATION_HELPER_H_ +#define _DML21_TRANSLATION_HELPER_H_ + +struct dc; +struct dc_state; +struct dcn_watermarks; +union dcn_watermark_set; +struct pipe_ctx; +struct dc_plane_state; + +struct dml2_context; +struct dml2_configuration_options; +struct dml2_initialize_instance_in_out; + +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); +void dml21_map_hw_resources(struct dml2_context *dml_ctx); +void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c new file mode 100644 index 000000000000..ee721606b883 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml21_translation_helper.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml2_dc_resource_mgmt.h" + +#include "dml2_core_dcn4_calcs.h" + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) + return i; + } + + return -1; +} + +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) + return i; + } + + return -1; +} + +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + + return false; +} + +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id) +{ + return 0xffff & plane_id; +} + +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) { + *dml_pipe_idx = i; + return; + } + } +} + +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx) +{ + struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe); + + *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head); + + if (pipe->plane_state) + *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe); +} + +/* places pipe references into pipes arrays and returns number of pipes */ +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + int dml_plane_idx) +{ + unsigned int dml_stream_index; + unsigned int main_stream_id; + unsigned int dc_plane_index; + struct dc_stream_state *dc_main_stream; + struct dc_stream_status *dc_main_stream_status; + struct dc_plane_state *dc_main_plane; + struct dc_stream_state *dc_phantom_stream; + struct dc_stream_status *dc_phantom_stream_status; + struct dc_plane_state *dc_phantom_plane; + int num_pipes = 0; + + memset(dc_main_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); + memset(dc_phantom_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index; + main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]; + + dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id); + dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream); + if (!dc_main_stream_status) + return num_pipes; + + /* find main plane based on id */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]); + dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index]; + + if (dc_main_plane) { + num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes); + } else { + /* stream was configured with dummy plane, so get pipes from opp head */ + struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream); + if (otg_master_pipe != NULL) + num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes); + } + + /* if phantom exists, find associated pipes */ + dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream); + if (dc_phantom_stream && num_pipes > 0) { + dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream); + + if (dc_phantom_stream_status) { + /* phantom plane will have same index as main */ + dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index]; + + if (dc_phantom_plane) { + /* only care about phantom pipes if they contain the phantom plane */ + dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes); + } + } + } + + return num_pipes; +} + +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) +{ + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + /* phantom has its own global sync */ + global_sync = &stream_programming->phantom_stream.global_sync; + } + + memcpy(&pipe_ctx->global_sync, + global_sync, + sizeof(union dml2_global_sync_programming)); +} + +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe) +{ + + /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ + /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ + if (dc_pipe->stream && dc_pipe->plane_state && + (dc_pipe->top_pipe == NULL || + dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) && + dc_pipe->prev_odm_pipe == NULL) { + /* SS: all active surfaces stored in MALL */ + if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) { + dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } else { + /* SUBVP: phantom surfaces only stored in MALL */ + dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } + } +} + +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +{ + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog) +{ + unsigned int pipe_reg_index = 0; + + dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); + find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = false; + pipe_ctx->det_buffer_size_kb = 0; + } else { + memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; + pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; + } + + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; + + dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); + + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); +} + +static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *main_stream, + struct dml2_per_stream_programming *stream_programming) +{ + struct dc_stream_state *phantom_stream; + struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor; + + phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream); + if (!phantom_stream) + return NULL; + + /* copy details of phantom stream from main */ + memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst)); + + /* modify timing for phantom */ + phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch; + phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active; + phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total; + phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active; + + phantom_stream->src.y = 0; + phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height; + + phantom_stream->use_dynamic_meta = false; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream); + + return phantom_stream; +} + +static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *main_plane, + struct dml2_per_plane_programming *plane_programming) +{ + struct dc_plane_state *phantom_plane; + + phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane); + if (!phantom_plane) + return NULL; + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &main_plane->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc)); + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->src.height; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context); + + return phantom_plane; +} + +void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int dml_stream_index, dml_plane_index, dc_plane_index; + struct dc_stream_state *main_stream; + struct dc_stream_status *main_stream_status; + struct dc_stream_state *phantom_stream; + struct dc_plane_state *main_plane; + bool phantoms_added = false; + + /* create phantom streams and planes and add to context */ + for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) { + /* iterate through DML streams looking for phantoms */ + if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) { + /* find associated dc stream */ + main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]); + + main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream); + + if (!main_stream_status || main_stream_status->plane_count == 0) + continue; + + /* create phantom stream for subvp enabled stream */ + phantom_stream = dml21_add_phantom_stream(dml_ctx, + dc, + context, + main_stream, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]); + + if (!phantom_stream) + continue; + + /* iterate through DML planes associated with this stream */ + for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) { + /* find associated dc plane */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]); + main_plane = main_stream_status->plane_states[dc_plane_index]; + + /* create phantom planes for subvp enabled plane */ + dml21_add_phantom_plane(dml_ctx, + dc, + context, + phantom_stream, + main_plane, + &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]); + + phantoms_added = true; + } + } + } + } + + if (phantoms_added) + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state); +} + +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx) +{ + int i, j, k; + unsigned int num_fams2_streams = 0; + + /* reset fams2 data */ + memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); + + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; + enum fams2_stream_type type = 0; + + union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams]; + + struct dc_stream_state *stream = context->streams[i]; + + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; + } + + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } + + /* copy static state from PMO */ + memcpy(static_base_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, + sizeof(union dmub_cmd_fams2_config)); + + if (dc->debug.fams_version.major == 3) { + memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams], + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2, + sizeof(union dmub_fams2_stream_static_sub_state_v2)); + } else { + memcpy(static_sub_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, + sizeof(union dmub_cmd_fams2_config)); + } + + switch (dc->debug.fams_version.minor) { + case 1: + default: + type = static_base_state->stream_v1.base.type; + + /* get information from context */ + static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count; + static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst; + + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_base_state->stream_v1.base.pipe_mask |= (1 << k); + static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k); + } + } + } + } + + + /* get per method programming */ + switch (type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + if (!phantom_status) + break; + + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + } + } + break; + default: + ASSERT(false); + break; + } + + num_fams2_streams++; + } + } + + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; +} + +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) +{ + return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h new file mode 100644 index 000000000000..4bff52eaaef8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_UTILS_H_ +#define _DML21_UTILS_H_ + +struct dc_state; +struct dc_plane_state; +struct pipe_ctx; + +struct dml2_context; +struct dml2_display_rq_regs; +struct dml2_display_dlg_regs; +struct dml2_display_ttu_regs; + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming); +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe); +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx); +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index); +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx); +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + int dml_plane_idx); +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog); +void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id); +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx); +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c new file mode 100644 index 000000000000..798abb2b2e67 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_internal_types.h" +#include "dml_top.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "dml2_dc_resource_mgmt.h" + +#define INVALID -1 + +static bool dml21_allocate_memory(struct dml2_context **dml_ctx) +{ + *dml_ctx = vzalloc(sizeof(struct dml2_context)); + if (!(*dml_ctx)) + return false; + + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); + if (!((*dml_ctx)->v21.dml_init.dml2_instance)) + return false; + + (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + + (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; + (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; + + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); + if (!((*dml_ctx)->v21.mode_programming.programming)) + return false; + + return true; +} + +static void dml21_populate_configuration_options(const struct dc *in_dc, + struct dml2_context *dml_ctx, + const struct dml2_configuration_options *config) +{ + dml_ctx->config = *config; + + /* UCLK P-State options */ + if (in_dc->debug.dml21_force_pstate_method) { + dml_ctx->config.pmo.force_pstate_method_enable = true; + for (int i = 0; i < MAX_PIPES; i++) + dml_ctx->config.pmo.force_pstate_method_values[i] = in_dc->debug.dml21_force_pstate_method_values[i]; + } else { + dml_ctx->config.pmo.force_pstate_method_enable = false; + } +} + +static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +{ + + dml_ctx->architecture = dml2_architecture_21; + + dml21_populate_configuration_options(in_dc, dml_ctx, config); + + DC_FP_START(); + + dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc); + + dml2_initialize_instance(&dml_ctx->v21.dml_init); + + DC_FP_END(); +} + +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dml_ctx)) + return false; + + dml21_init(in_dc, *dml_ctx, config); + + return true; +} + +void dml21_destroy(struct dml2_context *dml2) +{ + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); +} + +static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, + struct dml2_context *in_ctx, unsigned int pipe_cnt) +{ + unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0; + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_per_stream_programming *stream_prog = NULL; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + int num_pipes; + unsigned int dml_phantom_prog_idx; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + /* copy global DCHUBBUB arbiter registers */ + memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs)); + + /* legacy only */ + context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64; + + context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0; + + /* phantom's start after main planes */ + dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes; + + for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) { + pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + if (!pln_prog->plane_descriptor) + continue; + + stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index]; + num_dpps_required = pln_prog->num_dpps_required; + + if (num_dpps_required == 0) { + continue; + } + num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + + if (num_pipes <= 0) + continue; + + /* program each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog); + + if (pln_prog->phantom_plane.valid && dc_phantom_pipes[dc_pipe_index]) { + dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog); + } + } + + /* copy per plane mcache allocation */ + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + if (pln_prog->phantom_plane.valid) { + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx], + &pln_prog->phantom_plane.mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + + dml_phantom_prog_idx++; + } + } + + /* assign global clocks */ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000; + } + + if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000; + } + + /* get global mall allocation */ + if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) { + context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes); + } else { + context->bw_ctx.bw.dcn.clk.num_ways = 0; + } +} + +static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params) +{ + int dc_plane_idx = 0; + int dml_prog_idx, stream_idx, plane_idx; + struct dml2_per_plane_programming *pln_prog = NULL; + + for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) { + for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) { + dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context); + if (dml_prog_idx == INVALID) { + continue; + } + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid; + mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0; + mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1; + mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache; + mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1; + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0, + pln_prog->mcache_allocation.mcache_x_offsets_plane0, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1, + pln_prog->mcache_allocation.mcache_x_offsets_plane1, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + dc_plane_idx++; + } + } +} + +static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool result = false; + struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming; + struct dc_mcache_params mcache_params[MAX_PLANES] = {0}; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out)); + + if (!context) + return true; + + if (context->stream_count == 0) { + dml21_build_fams2_programming(in_dc, context, dml_ctx); + return true; + } + + /* scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + /* Populate stream, plane mappings and other fields in display config. */ + result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + if (!result) + return false; + + DC_FP_START(); + result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); + if (!result) + return false; + + /* Check and map HW resources */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_map_hw_resources(dml_ctx); + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state); + /* if subvp phantoms are present, expand them into dc context */ + dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx); + + if (in_dc->res_pool->funcs->program_mcache_pipe_config) { + //Prepare mcache params for each plane based on mcache output from DML + dml21_prepare_mcache_params(dml_ctx, context, mcache_params); + + //populate mcache regs to each pipe + dml_ctx->config.callbacks.allocate_mcache(context, mcache_params); + } + } + + /* Copy DML CLK, WM and REG outputs to bandwidth context */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); + dml21_copy_clocks_to_dc_state(dml_ctx, context); + dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); + dml21_build_fams2_programming(in_dc, context, dml_ctx); + } + + return true; +} + +static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool is_supported = false; + struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init; + struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out)); + + if (!context || context->stream_count == 0) + return true; + + /* Scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + mode_support->dml2_instance = dml_init->dml2_instance; + dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); + is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); + if (!is_supported) + return false; + + return true; +} + +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode) +{ + bool out = false; + + /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + out = dml21_check_mode_support(in_dc, context, dml_ctx); + else + out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + + return out; +} + +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index; + int num_pipes; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL; + struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals; + + if (context->stream_count == 0) { + return; + } + + memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out)); + l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance; + + /* phantom's start after main planes */ + dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes; + + /* Build mcache programming parameters per plane per pipe */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + if (pln_prog->num_dpps_required == 0) { + continue; + } + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid && + dc_phantom_pipes[0] && + dc_main_pipes[0]->stream && + dc_phantom_pipes[0]->plane_state) { + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } + + /* Call to generate mcache programming per plane per pipe for the given display configuration */ + dml2_build_mcache_programming(&l->build_mcache_programming_params); + + /* get per plane per pipe mcache programming */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) { + memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid && + dc_phantom_pipes[0] && + dc_main_pipes[0]->stream && + dc_phantom_pipes[0]->plane_state) { + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) { + memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } +} + +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Preserve references to internals */ + struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance; + struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming; + + /* Copy context */ + memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context)); + + /* Copy Internals */ + memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance)); + memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming)); + + /* Restore references to internals */ + dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance; + dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config; + dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config; + + dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + + DC_FP_START(); + + /* need to initialize copied instance for internal references to be correct */ + dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); + + DC_FP_END(); +} + +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dst_dml_ctx)) + return false; + + dml21_copy(*dst_dml_ctx, src_dml_ctx); + + return true; +} + +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +{ + dml21_init(in_dc, dml_ctx, config); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h new file mode 100644 index 000000000000..15f92029d2e5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_WRAPPER_H_ +#define _DML21_WRAPPER_H_ + +#include "os_types.h" +#include "dml_top_soc_parameter_types.h" +#include "dml_top_display_cfg_types.h" + +struct dc; +struct dc_state; +struct dml2_configuration_options; +struct dml2_context; +enum dc_validate_mode; + +/** + * dml2_create - Creates dml21_context. + * @in_dc: dc. + * @dml2: Created dml21 context. + * @config: dml21 configuration options. + * + * Create of DML21 is done as part of dc_state creation. + * DML21 IP, SOC and STATES are initialized at + * creation time. + * + * Return: True if dml2 is successfully created, false otherwise. + */ +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); +void dml21_destroy(struct dml2_context *dml2); +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx); +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx); +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config); + +/** + * dml21_validate - Determines if a display configuration is supported or not. + * @in_dc: dc. + * @context: dc_state to be validated. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX + * will not populate context.res_ctx. + * + * Based on fast_validate option internally would call: + * + * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option + * Calculates if dc_state can be supported on the input display + * configuration. If supported, generates the necessary HW + * programming for the new dc_state. + * + * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option + * Calculates if dc_state can be supported for the input display + * config. + + * Context: Two threads may not invoke this function concurrently unless they reference + * separate dc_states for validation. + * Return: True if mode is supported, false otherwise. + */ +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode); + +/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */ +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); + +/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */ +struct socbb_ip_params_external { + struct dml2_ip_capabilities ip_params; + struct dml2_soc_bb soc_bb; +}; + +/*mcache parameters decided by dml*/ +struct dc_mcache_params { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h new file mode 100644 index 000000000000..16a4f97bca4e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_DML_DCN4_SOC_BB__ +#define __DML_DML_DCN4_SOC_BB__ + +#include "dml_top_soc_parameter_types.h" + +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 33, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 12, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4x = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 257, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + .minimum_uclk_khz = 97 * 1000, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 61, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 118, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4x, +}; + +static const struct dml2_soc_bb dml2_socbb_dcn401 = { + .clk_table = { + .uclk = { + .clk_values_khz = {97000}, + .num_clk_values = 1, + }, + .fclk = { + .clk_values_khz = {300000, 2500000}, + .num_clk_values = 2, + }, + .dcfclk = { + .clk_values_khz = {200000, 1564000}, + .num_clk_values = 2, + }, + .dispclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dppclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dtbclk = { + .clk_values_khz = {100000, 1564000}, + .num_clk_values = 2, + }, + .phyclk = { + .clk_values_khz = {810000, 810000}, + .num_clk_values = 2, + }, + .socclk = { + .clk_values_khz = {300000, 1200000}, + .num_clk_values = 2, + }, + .dscclk = { + .clk_values_khz = {666667, 666667}, + .num_clk_values = 2, + }, + .phyclk_d18 = { + .clk_values_khz = {625000, 625000}, + .num_clk_values = 2, + }, + .phyclk_d32 = { + .clk_values_khz = {625000, 625000}, + .num_clk_values = 2, + }, + .dram_config = { + .channel_width_bytes = 2, + .channel_count = 16, + .transactions_per_clock = 16, + }, + }, + + .qos_parameters = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 15, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 30, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 0, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4x = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 260, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + // State 1 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 72, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 117, + }, + { + // State 2 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 546, + .trip_to_memory_uclk_cycles = 848, + .meta_trip_to_memory_uclk_cycles = 848, + .maximum_latency_when_urgent_uclk_cycles = 146, + .average_latency_when_urgent_uclk_cycles = 146, + .maximum_latency_when_non_urgent_uclk_cycles = 848, + .average_latency_when_non_urgent_uclk_cycles = 133, + }, + { + // State 3 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 564, + .trip_to_memory_uclk_cycles = 853, + .meta_trip_to_memory_uclk_cycles = 853, + .maximum_latency_when_urgent_uclk_cycles = 164, + .average_latency_when_urgent_uclk_cycles = 164, + .maximum_latency_when_non_urgent_uclk_cycles = 853, + .average_latency_when_non_urgent_uclk_cycles = 136, + }, + { + // State 4 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 613, + .trip_to_memory_uclk_cycles = 869, + .meta_trip_to_memory_uclk_cycles = 869, + .maximum_latency_when_urgent_uclk_cycles = 213, + .average_latency_when_urgent_uclk_cycles = 213, + .maximum_latency_when_non_urgent_uclk_cycles = 869, + .average_latency_when_non_urgent_uclk_cycles = 149, + }, + { + // State 5 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 632, + .trip_to_memory_uclk_cycles = 874, + .meta_trip_to_memory_uclk_cycles = 874, + .maximum_latency_when_urgent_uclk_cycles = 232, + .average_latency_when_urgent_uclk_cycles = 232, + .maximum_latency_when_non_urgent_uclk_cycles = 874, + .average_latency_when_non_urgent_uclk_cycles = 153, + }, + { + // State 6 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 665, + .trip_to_memory_uclk_cycles = 885, + .meta_trip_to_memory_uclk_cycles = 885, + .maximum_latency_when_urgent_uclk_cycles = 265, + .average_latency_when_urgent_uclk_cycles = 265, + .maximum_latency_when_non_urgent_uclk_cycles = 885, + .average_latency_when_non_urgent_uclk_cycles = 161, + }, + { + // State 7 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 689, + .trip_to_memory_uclk_cycles = 895, + .meta_trip_to_memory_uclk_cycles = 895, + .maximum_latency_when_urgent_uclk_cycles = 289, + .average_latency_when_urgent_uclk_cycles = 289, + .maximum_latency_when_non_urgent_uclk_cycles = 895, + .average_latency_when_non_urgent_uclk_cycles = 167, + }, + { + // State 8 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 716, + .trip_to_memory_uclk_cycles = 902, + .meta_trip_to_memory_uclk_cycles = 902, + .maximum_latency_when_urgent_uclk_cycles = 316, + .average_latency_when_urgent_uclk_cycles = 316, + .maximum_latency_when_non_urgent_uclk_cycles = 902, + .average_latency_when_non_urgent_uclk_cycles = 174, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4x, + }, + + .power_management_parameters = { + .dram_clk_change_blackout_us = 400, + .fclk_change_blackout_us = 0, + .g7_ppt_blackout_us = 0, + .stutter_enter_plus_exit_latency_us = 54, + .stutter_exit_latency_us = 41, + .z8_stutter_enter_plus_exit_latency_us = 0, + .z8_stutter_exit_latency_us = 0, + /* + .g6_temp_read_blackout_us = { + 23.00, + 10.00, + 10.00, + 8.00, + 8.00, + 5.00, + 5.00, + 5.00, + }, + */ + }, + + .vmin_limit = { + .dispclk_khz = 600 * 1000, + }, + + .dprefclk_mhz = 720, + .xtalclk_mhz = 100, + .pcie_refclk_mhz = 100, + .dchub_refclk_mhz = 50, + .mall_allocated_for_dcn_mbytes = 64, + .max_outstanding_reqs = 512, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .hostvm_min_page_size_kbytes = 0, + .gpuvm_min_page_size_kbytes = 256, + .phy_downspread_percent = 0.38, + .dcn_downspread_percent = 0.38, + .dispclk_dppclk_vco_speed_mhz = 4500, + .do_urgent_latency_adjustment = 0, + .mem_word_bytes = 32, + .num_dcc_mcaches = 8, + .mcache_size_bytes = 2048, + .mcache_line_size_bytes = 32, + .max_fclk_for_uclk_dpm_khz = 1250 * 1000, +}; + +static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { + .pipe_count = 4, + .otg_count = 4, + .num_dsc = 4, + .max_num_dp2p0_streams = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_dp2p0_outputs = 4, + .rob_buffer_size_kbytes = 192, + .config_return_buffer_size_in_kbytes = 1344, + .config_return_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .compressed_buffer_segment_size_in_kbytes = 64, + .cursor_buffer_size = 24, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, + .subvp_drr_scheduling_margin_us = 100, + .subvp_prefetch_end_to_mall_start_us = 15, + .subvp_fw_processing_delay = 15, + .max_vactive_det_fill_delay_us = 400, + + .fams2 = { + .max_allow_delay_us = 100 * 1000, + .scheduling_delay_us = 550, + .vertical_interrupt_ack_delay_us = 40, + .allow_programming_delay_us = 18, + .min_allow_width_us = 20, + .subvp_df_throttle_delay_us = 100, + .subvp_programming_delay_us = 200, + .subvp_prefetch_to_mall_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, + }, +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h new file mode 100644 index 000000000000..281d7ad230d8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_EXTERNAL_LIB_DEPS__ +#define __DML2_EXTERNAL_LIB_DEPS__ + +#include "os_types.h" + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h new file mode 100644 index 000000000000..a64ec4dcf11a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_H__ +#define __DML_TOP_H__ + +#include "dml_top_types.h" + +/* + * Top Level Interface for DML2 + */ + +/* + * Returns the size of the DML instance for the caller to allocate + */ +unsigned int dml2_get_instance_size_bytes(void); + +/* + * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...) + */ +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out); + +/* + * Determines if the input mode is supported (boolean) on the SoC at all. Does not return + * information on how mode should be programmed. + */ +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out); + +/* + * Determines the full (optimized) programming for the input mode. Returns minimum + * clocks as well as dchub register programming values for all pipes, additional meta + * such as ODM or MPCC combine factors. + */ +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out); + +/* + * Determines the correct per pipe mcache register programming for a valid mode. + * The mcache allocation must have been calculated (successfully) in a previous + * call to dml2_build_mode_programming. + * The actual hubp viewport dimensions be what the actual registers will be + * programmed to (i.e. based on scaler setup). + */ +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h new file mode 100644 index 000000000000..bf57df42d1d9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __dml2_TOP_DCHUB_REGISTERS_H__ +#define __dml2_TOP_DCHUB_REGISTERS_H__ + +#include "dml2_external_lib_deps.h" +// These types are uint32_t as they represent actual calculated register values for HW + +struct dml2_display_dlg_regs { + uint32_t refcyc_h_blank_end; + uint32_t dlg_vblank_end; + uint32_t min_dst_y_next_start; + uint32_t refcyc_per_htotal; + uint32_t refcyc_x_after_scaler; + uint32_t dst_y_after_scaler; + uint32_t dst_y_prefetch; + uint32_t dst_y_per_vm_vblank; + uint32_t dst_y_per_row_vblank; + uint32_t dst_y_per_vm_flip; + uint32_t dst_y_per_row_flip; + uint32_t ref_freq_to_pix_freq; + uint32_t vratio_prefetch; + uint32_t vratio_prefetch_c; + uint32_t refcyc_per_tdlut_group; + uint32_t refcyc_per_pte_group_vblank_l; + uint32_t refcyc_per_pte_group_vblank_c; + uint32_t refcyc_per_pte_group_flip_l; + uint32_t refcyc_per_pte_group_flip_c; + uint32_t dst_y_per_pte_row_nom_l; + uint32_t dst_y_per_pte_row_nom_c; + uint32_t refcyc_per_pte_group_nom_l; + uint32_t refcyc_per_pte_group_nom_c; + uint32_t refcyc_per_line_delivery_pre_l; + uint32_t refcyc_per_line_delivery_pre_c; + uint32_t refcyc_per_line_delivery_l; + uint32_t refcyc_per_line_delivery_c; + uint32_t refcyc_per_vm_group_vblank; + uint32_t refcyc_per_vm_group_flip; + uint32_t refcyc_per_vm_req_vblank; + uint32_t refcyc_per_vm_req_flip; + uint32_t dst_y_offset_cur0; + uint32_t chunk_hdl_adjust_cur0; + uint32_t vready_after_vcount0; + uint32_t dst_y_delta_drq_limit; + uint32_t refcyc_per_vm_dmdata; + uint32_t dmdata_dl_delta; + + // MRQ + uint32_t refcyc_per_meta_chunk_vblank_l; + uint32_t refcyc_per_meta_chunk_vblank_c; + uint32_t refcyc_per_meta_chunk_flip_l; + uint32_t refcyc_per_meta_chunk_flip_c; + uint32_t dst_y_per_meta_row_nom_l; + uint32_t dst_y_per_meta_row_nom_c; + uint32_t refcyc_per_meta_chunk_nom_l; + uint32_t refcyc_per_meta_chunk_nom_c; +}; + +struct dml2_display_ttu_regs { + uint32_t qos_level_low_wm; + uint32_t qos_level_high_wm; + uint32_t min_ttu_vblank; + uint32_t qos_level_flip; + uint32_t refcyc_per_req_delivery_l; + uint32_t refcyc_per_req_delivery_c; + uint32_t refcyc_per_req_delivery_cur0; + uint32_t refcyc_per_req_delivery_pre_l; + uint32_t refcyc_per_req_delivery_pre_c; + uint32_t refcyc_per_req_delivery_pre_cur0; + uint32_t qos_level_fixed_l; + uint32_t qos_level_fixed_c; + uint32_t qos_level_fixed_cur0; + uint32_t qos_ramp_disable_l; + uint32_t qos_ramp_disable_c; + uint32_t qos_ramp_disable_cur0; +}; + +struct dml2_display_arb_regs { + uint32_t max_req_outstanding; + uint32_t min_req_outstanding; + uint32_t sat_level_us; + uint32_t hvm_max_qos_commit_threshold; + uint32_t hvm_min_req_outstand_commit_threshold; + uint32_t compbuf_reserved_space_kbytes; + uint32_t compbuf_size; + uint32_t sdpif_request_rate_limit; + uint32_t allow_sdpif_rate_limit_when_cstate_req; + uint32_t dcfclk_deep_sleep_hysteresis; + uint32_t pstate_stall_threshold; +}; + +struct dml2_cursor_dlg_regs{ + uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET + uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET + uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST + + uint32_t qos_level_fixed; + uint32_t qos_ramp_disable; +}; + +struct dml2_display_plane_rq_regs { + uint32_t chunk_size; + uint32_t min_chunk_size; + uint32_t dpte_group_size; + uint32_t mpte_group_size; + uint32_t swath_height; + uint32_t pte_row_height_linear; + + // MRQ + uint32_t meta_chunk_size; + uint32_t min_meta_chunk_size; +}; + +struct dml2_display_rq_regs { + struct dml2_display_plane_rq_regs rq_regs_l; + struct dml2_display_plane_rq_regs rq_regs_c; + uint32_t drq_expansion_mode; + uint32_t prq_expansion_mode; + uint32_t crq_expansion_mode; + uint32_t plane1_base_address; + uint32_t unbounded_request_enabled; + bool pte_buffer_mode; + bool force_one_row_for_frame; + + // MRQ + uint32_t mrq_expansion_mode; +}; + +struct dml2_display_mcache_regs { + uint32_t mcache_id_first; + uint32_t mcache_id_second; + uint32_t split_location; +}; + +struct dml2_hubp_pipe_mcache_regs { + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } main; + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } mall; +}; + +struct dml2_dchub_per_pipe_register_set { + struct dml2_display_rq_regs rq_regs; + struct dml2_display_ttu_regs ttu_regs; + struct dml2_display_dlg_regs dlg_regs; + + uint32_t det_size; +}; + +struct dml2_dchub_watermark_regs { + /* watermarks */ + uint32_t urgent; + uint32_t sr_enter; + uint32_t sr_exit; + uint32_t sr_enter_z8; + uint32_t sr_exit_z8; + uint32_t sr_enter_low_power; + uint32_t sr_exit_low_power; + uint32_t uclk_pstate; + uint32_t fclk_pstate; + uint32_t temp_read_or_ppt; + uint32_t usr; + /* qos */ + uint32_t refcyc_per_trip_to_mem; + uint32_t refcyc_per_meta_trip_to_mem; + uint32_t frac_urg_bw_flip; + uint32_t frac_urg_bw_nom; + uint32_t frac_urg_bw_mall; +}; + +enum dml2_dchub_watermark_reg_set_index { + DML2_DCHUB_WATERMARK_SET_A = 0, + DML2_DCHUB_WATERMARK_SET_B = 1, + DML2_DCHUB_WATERMARK_SET_C = 2, + DML2_DCHUB_WATERMARK_SET_D = 3, + DML2_DCHUB_WATERMARK_SET_NUM = 4, +}; + +struct dml2_dchub_global_register_set { + struct dml2_display_arb_regs arb_regs; + struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM]; + unsigned int num_watermark_sets; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h new file mode 100644 index 000000000000..35aa954248cd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h @@ -0,0 +1,526 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ +#define __DML_TOP_DISPLAY_CFG_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML2_MAX_PLANES 8 +#define DML2_MAX_DCN_PIPES 8 +#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches +#define DML2_MAX_WRITEBACK 3 + +enum dml2_swizzle_mode { + dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled + dml2_sw_256b_2d, + dml2_sw_4kb_2d, + dml2_sw_64kb_2d, + dml2_sw_256kb_2d, + + dml2_gfx11_sw_linear, + dml2_gfx11_sw_64kb_d, + dml2_gfx11_sw_64kb_d_t, + dml2_gfx11_sw_64kb_d_x, + dml2_gfx11_sw_64kb_r_x, + dml2_gfx11_sw_256kb_d_x, + dml2_gfx11_sw_256kb_r_x, + +}; + +enum dml2_source_format_class { + dml2_444_8 = 0, + dml2_444_16 = 1, + dml2_444_32 = 2, + dml2_444_64 = 3, + dml2_420_8 = 4, + dml2_420_10 = 5, + dml2_420_12 = 6, + dml2_rgbe_alpha = 9, + dml2_rgbe = 10, + dml2_mono_8 = 11, + dml2_mono_16 = 12, + dml2_422_planar_8 = 13, + dml2_422_planar_10 = 14, + dml2_422_planar_12 = 15, + dml2_422_packed_8 = 16, + dml2_422_packed_10 = 17, + dml2_422_packed_12 = 18 +}; + +enum dml2_sample_positioning { + dml2_interstitial = 0, + dml2_cosited = 1 +}; + +enum dml2_rotation_angle { + dml2_rotation_0 = 0, + dml2_rotation_90 = 1, + dml2_rotation_180 = 2, + dml2_rotation_270 = 3 +}; + +enum dml2_output_format_class { + dml2_444 = 0, + dml2_s422 = 1, + dml2_n422 = 2, + dml2_420 = 3 +}; + +enum dml2_output_encoder_class { + dml2_dp = 0, + dml2_edp = 1, + dml2_dp2p0 = 2, + dml2_hdmi = 3, + dml2_hdmifrl = 4, + dml2_none = 5 +}; + +enum dml2_output_link_dp_rate { + dml2_dp_rate_na = 0, + dml2_dp_rate_hbr = 1, + dml2_dp_rate_hbr2 = 2, + dml2_dp_rate_hbr3 = 3, + dml2_dp_rate_uhbr10 = 4, + dml2_dp_rate_uhbr13p5 = 5, + dml2_dp_rate_uhbr20 = 6 +}; + +enum dml2_pstate_type { + dml2_pstate_type_uclk = 0, + dml2_pstate_type_fclk = 1, + dml2_pstate_type_ppt = 2, + dml2_pstate_type_temp_read = 3, + dml2_pstate_type_dummy_pstate = 4, + dml2_pstate_type_count = 5 +}; + +enum dml2_uclk_pstate_change_strategy { + dml2_uclk_pstate_change_strategy_auto = 0, + dml2_uclk_pstate_change_strategy_force_vactive = 1, + dml2_uclk_pstate_change_strategy_force_vblank = 2, + dml2_uclk_pstate_change_strategy_force_drr = 3, + dml2_uclk_pstate_change_strategy_force_mall_svp = 4, + dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5, +}; + +enum dml2_svp_mode_override { + dml2_svp_mode_override_auto = 0, + dml2_svp_mode_override_main_pipe = 1, + dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions + dml2_svp_mode_override_phantom_pipe_no_data_return = 3, + dml2_svp_mode_override_imall = 4 +}; + +enum dml2_refresh_from_mall_mode_override { + dml2_refresh_from_mall_mode_override_auto = 0, + dml2_refresh_from_mall_mode_override_force_disable = 1, + dml2_refresh_from_mall_mode_override_force_enable = 2 +}; + +enum dml2_odm_mode { + dml2_odm_mode_auto = 0, + dml2_odm_mode_bypass, + dml2_odm_mode_combine_2to1, + dml2_odm_mode_combine_3to1, + dml2_odm_mode_combine_4to1, + dml2_odm_mode_split_1to2, + dml2_odm_mode_mso_1to2, + dml2_odm_mode_mso_1to4 +}; + +enum dml2_scaling_transform { + dml2_scaling_transform_explicit = 0, + dml2_scaling_transform_fullscreen, + dml2_scaling_transform_aspect_ratio, + dml2_scaling_transform_centered +}; + +enum dml2_dsc_enable_option { + dml2_dsc_disable = 0, + dml2_dsc_enable = 1, + dml2_dsc_enable_if_necessary = 2 +}; + +enum dml2_tdlut_addressing_mode { + dml2_tdlut_sw_linear = 0, + dml2_tdlut_simple_linear = 1 +}; + +enum dml2_tdlut_width_mode { + dml2_tdlut_width_17_cube = 0, + dml2_tdlut_width_33_cube = 1 +}; + +enum dml2_twait_budgeting_setting { + dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait + + dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed + //(i.e. UCLK/FCLK DPM cannot be supported in active) + + dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that + // can support it +}; + +struct dml2_get_cursor_dlg_reg{ + unsigned int cursor_x_position; + unsigned int cursor_hotspot_x; + unsigned int cursor_primary_offset; + unsigned int cursor_secondary_offset; + bool cursor_stereo_en; + bool cursor_2x_magnify; + double hratio; + double pixel_rate_mhz; + double dlg_refclk_mhz; +}; + +/// @brief Surface Parameters +struct dml2_surface_cfg { + enum dml2_swizzle_mode tiling; + + struct { + unsigned long pitch; // In elements, two pixels per element in 422 packed format + unsigned long width; + unsigned long height; + } plane0; + + + struct { + unsigned long pitch; + unsigned long width; + unsigned long height; + } plane1; + + struct { + bool enable; + struct { + unsigned long pitch; + } plane0; + struct { + unsigned long pitch; + } plane1; + + struct { + double dcc_rate_plane0; + double dcc_rate_plane1; + double fraction_of_zero_size_request_plane0; + double fraction_of_zero_size_request_plane1; + } informative; + } dcc; +}; + + +struct dml2_composition_cfg { + enum dml2_rotation_angle rotation_angle; + bool mirrored; + enum dml2_scaling_transform scaling_transform; + bool rect_out_height_spans_vactive; + + struct { + bool stationary; + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane0; + + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane1; + } viewport; + + struct { + bool enabled; + bool easf_enabled; + bool isharp_enabled; + bool upsp_enabled; + enum dml2_sample_positioning upsp_sample_positioning; + unsigned int upsp_vtaps; + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane0; + + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane1; + + unsigned long rect_out_width; + } scaler_info; +}; + +struct dml2_timing_cfg { + unsigned long h_total; + unsigned long v_total; + unsigned long h_blank_end; + unsigned long v_blank_end; + unsigned long h_front_porch; + unsigned long v_front_porch; + unsigned long h_sync_width; + unsigned long pixel_clock_khz; + unsigned long h_active; + unsigned long v_active; + unsigned int bpc; //FIXME: review with Jun + struct { + enum dml2_dsc_enable_option enable; + unsigned int dsc_compressed_bpp_x16; + struct { + // for dv to specify num dsc slices to use + unsigned int num_slices; + } overrides; + } dsc; + bool interlaced; + struct { + /* static */ + bool enabled; + unsigned long min_refresh_uhz; + unsigned int max_instant_vtotal_delta; + /* dynamic */ + bool disallowed; + bool drr_active_variable; + bool drr_active_fixed; + } drr_config; + unsigned long vblank_nom; +}; + +struct dml2_link_output_cfg { + enum dml2_output_format_class output_format; + enum dml2_output_encoder_class output_encoder; + unsigned int output_dp_lane_count; + enum dml2_output_link_dp_rate output_dp_link_rate; + unsigned long audio_sample_rate; + unsigned long audio_sample_layout; + bool output_disabled; // The stream does not go to a backend for output to a physical + //connector (e.g. writeback only, phantom pipe) goes to writeback + bool validate_output; // Do not validate the link configuration for this display stream. +}; + +struct dml2_writeback_info { + enum dml2_source_format_class pixel_format; + unsigned long input_width; + unsigned long input_height; + unsigned long output_width; + unsigned long output_height; + unsigned long v_taps; + unsigned long h_taps; + unsigned long v_taps_chroma; + unsigned long h_taps_chroma; + double h_ratio; + double v_ratio; +}; + +struct dml2_writeback_cfg { + unsigned int active_writebacks_per_stream; + struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK]; +}; + +struct dml2_plane_parameters { + unsigned int stream_index; // Identifies which plane will be composed + + enum dml2_source_format_class pixel_format; + /* + * The surface and composition structures use + * the terms plane0 and plane1. These planes + * are expected to hold the following data based + * on the pixel format. + * + * RGB or YUV Non-Planar Types: + * dml2_444_8 + * dml2_444_16 + * dml2_444_32 + * dml2_444_64 + * dml2_rgbe + * + * plane0 = argb or rgbe + * plane1 = not used + * + * YUV Planar-Types: + * dml2_420_8 + * dml2_420_10 + * dml2_420_12 + * + * plane0 = luma + * plane1 = chroma + * + * RGB Planar Types: + * dml2_rgbe_alpha + * + * plane0 = rgbe + * plane1 = alpha + * + * Mono Non-Planar Types: + * dml2_mono_8 + * dml2_mono_16 + * + * plane0 = luma + * plane1 = not used + */ + + struct dml2_surface_cfg surface; + struct dml2_composition_cfg composition; + + struct { + bool enable; + unsigned long lines_before_active_required; + unsigned long transmitted_bytes; + } dynamic_meta_data; + + struct { + unsigned int num_cursors; + unsigned long cursor_width; + unsigned long cursor_bpp; + } cursor; + + // For TDLUT, SW would assume TDLUT is setup and enable all the time and + // budget for worst case addressing/width mode + struct { + bool setup_for_tdlut; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode; + enum dml2_tdlut_width_mode tdlut_width_mode; + bool tdlut_mpc_width_flag; + } tdlut; + + bool immediate_flip; + + struct { + // Logical overrides to power management policies (usually) + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + enum dml2_refresh_from_mall_mode_override refresh_from_mall; + unsigned int det_size_override_kb; + unsigned int mpcc_combine_factor; + + // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait + // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override. + long reserved_vblank_time_ns; + unsigned int max_vactive_det_fill_delay_us[dml2_pstate_type_count]; // 0 = no reserved time, +ve = explicit max delay + unsigned int gpuvm_min_page_size_kbytes; + unsigned int hostvm_min_page_size_kbytes; + + enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config + + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + bool force_one_row_for_frame; + struct { + bool enable; + bool value; + } force_pte_buffer_mode; + double dppclk_mhz; + } hw; + } overrides; +}; + +struct dml2_stream_parameters { + struct dml2_timing_cfg timing; + struct dml2_link_output_cfg output; + struct dml2_writeback_cfg writeback; + + struct { + enum dml2_odm_mode odm_mode; + bool disable_dynamic_odm; + bool disable_subvp; + int minimum_vblank_idle_requirement_us; + + struct { + struct { + enum dml2_twait_budgeting_setting uclk_pstate; + enum dml2_twait_budgeting_setting fclk_pstate; + enum dml2_twait_budgeting_setting stutter_enter_exit; + } twait_budgeting; + } hw; + } overrides; +}; + +struct dml2_display_cfg { + bool gpuvm_enable; + bool ffbm_enable; + bool hostvm_enable; + + // Allocate DET proportionally between streams based on pixel rate + // and then allocate proportionally between planes. + bool minimize_det_reallocation; + + unsigned int gpuvm_max_page_table_levels; + unsigned int hostvm_max_non_cached_page_table_levels; + + struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES]; + struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES]; + + unsigned int num_planes; + unsigned int num_streams; + + struct { + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + struct { + bool enable; + bool value; + } force_unbounded_requesting; + + struct { + bool enable; + bool value; + } force_nom_det_size_kbytes; + + bool mode_support_check_disable; + bool mcache_admissibility_check_disable; + bool surface_viewport_size_check_disable; + double dlg_ref_clk_mhz; + double dispclk_mhz; + double dcfclk_mhz; + bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode + } hw; + + struct { + bool uclk_pstate_change_disable; + bool fclk_pstate_change_disable; + bool g6_temp_read_pstate_disable; + bool g7_ppt_pstate_disable; + } power_management; + + bool enhanced_prefetch_schedule_acceleration; + bool dcc_programming_assumes_scan_direction_unknown; + bool synchronize_timings; + bool synchronize_ddr_displays_for_uclk_pstate_change; + bool max_outstanding_when_urgent_expected_disable; + bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming + bool all_streams_blanked; + } overrides; +}; + +struct dml2_pipe_configuration_descriptor { + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane0; + + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane1; + + bool plane1_enabled; + bool imall_enabled; +}; + +struct dml2_plane_mcache_configuration_descriptor { + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_mcache_surface_allocation *mcache_allocation; + + struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES]; + char num_pipes; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h new file mode 100644 index 000000000000..8f624a912e78 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_POLICY_TYPES_H__ +#define __DML_TOP_POLICY_TYPES_H__ + +struct dml2_policy_parameters { + unsigned long odm_combine_dispclk_threshold_khz; + unsigned int max_immediate_flip_latency; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h new file mode 100644 index 000000000000..1fbc520c2540 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ +#define __DML_TOP_SOC_PARAMETER_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML_MAX_CLK_TABLE_SIZE 20 + +struct dml2_soc_derate_values { + unsigned int dram_derate_percent_pixel; + unsigned int dram_derate_percent_vm; + unsigned int dram_derate_percent_pixel_and_vm; + + unsigned int fclk_derate_percent; + unsigned int dcfclk_derate_percent; +}; + +struct dml2_soc_derates { + struct dml2_soc_derate_values system_active_urgent; + struct dml2_soc_derate_values system_active_average; + struct dml2_soc_derate_values dcn_mall_prefetch_urgent; + struct dml2_soc_derate_values dcn_mall_prefetch_average; + struct dml2_soc_derate_values system_idle_average; +}; + +struct dml2_dcn32x_soc_qos_params { + struct { + unsigned int base_latency_us; + unsigned int base_latency_pixel_vm_us; + unsigned int base_latency_vm_us; + unsigned int scaling_factor_fclk_us; + unsigned int scaling_factor_mhz; + } urgent_latency_us; + + unsigned int loaded_round_trip_latency_fclk_cycles; + unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes; + unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes; +}; + +struct dml2_dcn4_uclk_dpm_dependent_qos_params { + unsigned long minimum_uclk_khz; + unsigned int urgent_ramp_uclk_cycles; + unsigned int trip_to_memory_uclk_cycles; + unsigned int meta_trip_to_memory_uclk_cycles; + unsigned int maximum_latency_when_urgent_uclk_cycles; + unsigned int average_latency_when_urgent_uclk_cycles; + unsigned int maximum_latency_when_non_urgent_uclk_cycles; + unsigned int average_latency_when_non_urgent_uclk_cycles; +}; + +struct dml2_dcn4x_soc_qos_params { + unsigned int df_qos_response_time_fclk_cycles; + unsigned int max_round_trip_to_furthest_cs_fclk_cycles; + unsigned int mall_overhead_fclk_cycles; + unsigned int meta_trip_adder_fclk_cycles; + unsigned int average_transport_distance_fclk_cycles; + double umc_urgent_ramp_latency_margin; + double umc_max_latency_margin; + double umc_average_latency_margin; + double fabric_max_transport_latency_margin; + double fabric_average_transport_latency_margin; + struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE]; +}; + +enum dml2_qos_param_type { + dml2_qos_param_type_dcn3, + dml2_qos_param_type_dcn4x +}; + +struct dml2_soc_qos_parameters { + struct dml2_soc_derates derate_table; + struct { + unsigned int base_latency_us; + unsigned int scaling_factor_us; + unsigned int scaling_factor_mhz; + } writeback; + + union { + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; + } qos_params; + + enum dml2_qos_param_type qos_type; +}; + +struct dml2_soc_power_management_parameters { + double dram_clk_change_blackout_us; + double dram_clk_change_read_only_us; // deprecated + double dram_clk_change_write_only_us; // deprecated + double fclk_change_blackout_us; + double g7_ppt_blackout_us; + double g7_temperature_read_blackout_us; + double stutter_enter_plus_exit_latency_us; + double stutter_exit_latency_us; + double low_power_stutter_enter_plus_exit_latency_us; + double low_power_stutter_exit_latency_us; + double z8_stutter_enter_plus_exit_latency_us; + double z8_stutter_exit_latency_us; + double z8_min_idle_time; + double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE]; + double type_b_dram_clk_change_blackout_us; + double type_b_ppt_blackout_us; +}; + +struct dml2_clk_table { + unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE]; + unsigned char num_clk_values; +}; + +struct dml2_dram_params { + unsigned int channel_width_bytes; + unsigned int channel_count; + unsigned int transactions_per_clock; +}; + +struct dml2_soc_state_table { + struct dml2_clk_table uclk; + struct dml2_clk_table fclk; + struct dml2_clk_table dcfclk; + struct dml2_clk_table dispclk; + struct dml2_clk_table dppclk; + struct dml2_clk_table dtbclk; + struct dml2_clk_table phyclk; + struct dml2_clk_table socclk; + struct dml2_clk_table dscclk; + struct dml2_clk_table phyclk_d18; + struct dml2_clk_table phyclk_d32; + + struct dml2_dram_params dram_config; +}; + +struct dml2_soc_vmin_clock_limits { + unsigned long dispclk_khz; + unsigned long dcfclk_khz; +}; + +struct dml2_soc_bb { + struct dml2_soc_state_table clk_table; + struct dml2_soc_qos_parameters qos_parameters; + struct dml2_soc_power_management_parameters power_management_parameters; + struct dml2_soc_vmin_clock_limits vmin_limit; + + double lower_bound_bandwidth_dchub; + double fraction_of_urgent_bandwidth_nominal_target; + double fraction_of_urgent_bandwidth_flip_target; + unsigned int dprefclk_mhz; + unsigned int xtalclk_mhz; + unsigned int pcie_refclk_mhz; + unsigned int dchub_refclk_mhz; + unsigned int mall_allocated_for_dcn_mbytes; + unsigned int max_outstanding_reqs; + unsigned long fabric_datapath_to_dcn_data_return_bytes; + unsigned long return_bus_width_bytes; + unsigned long hostvm_min_page_size_kbytes; + unsigned long gpuvm_min_page_size_kbytes; + double phy_downspread_percent; + double dcn_downspread_percent; + double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; + bool do_urgent_latency_adjustment; + unsigned int mem_word_bytes; + unsigned int num_dcc_mcaches; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned long max_fclk_for_uclk_dpm_khz; +}; + +struct dml2_ip_capabilities { + unsigned int pipe_count; + unsigned int otg_count; + unsigned int TDLUT_33cube_count; + unsigned int num_dsc; + unsigned int max_num_dp2p0_streams; + unsigned int max_num_hdmi_frl_outputs; + unsigned int max_num_dp2p0_outputs; + unsigned int max_num_wb; + unsigned int rob_buffer_size_kbytes; + unsigned int config_return_buffer_size_in_kbytes; + unsigned int config_return_buffer_segment_size_in_kbytes; + unsigned int meta_fifo_size_in_kentries; + unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int cursor_buffer_size; + unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; + unsigned int hostvm_mode; + unsigned int subvp_drr_scheduling_margin_us; + unsigned int subvp_prefetch_end_to_mall_start_us; + unsigned int subvp_fw_processing_delay; + unsigned int max_vactive_det_fill_delay_us; + unsigned int ppt_max_allow_delay_us; + unsigned int temp_read_max_allow_delay_us; + unsigned int dummy_pstate_max_allow_delay_us; + /* FAMS2 delays */ + struct { + unsigned int max_allow_delay_us; + unsigned int scheduling_delay_us; + unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int + unsigned int allow_programming_delay_us; // time requires to program allow + unsigned int min_allow_width_us; + unsigned int subvp_df_throttle_delay_us; + unsigned int subvp_programming_delay_us; + unsigned int subvp_prefetch_to_mall_delay_us; + unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; + } fams2; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h new file mode 100644 index 000000000000..452e4a2e72c0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h @@ -0,0 +1,744 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_TYPES_H__ +#define __DML_TOP_TYPES_H__ + +#include "dml_top_display_cfg_types.h" +#include "dml_top_soc_parameter_types.h" +#include "dml_top_policy_types.h" +#include "dml_top_dchub_registers.h" + +#include "dmub_cmd.h" + +struct dml2_instance; + +enum dml2_project_id { + dml2_project_invalid = 0, + dml2_project_dcn4x_stage1, + dml2_project_dcn4x_stage2, + dml2_project_dcn4x_stage2_auto_drr_svp, +}; + +enum dml2_pstate_change_support { + dml2_pstate_change_vactive = 0, + dml2_pstate_change_vblank = 1, + dml2_pstate_change_vblank_and_vactive = 2, + dml2_pstate_change_drr = 3, + dml2_pstate_change_mall_svp = 4, + dml2_pstate_change_mall_full_frame = 6, + dml2_pstate_change_unsupported = 7 +}; + +enum dml2_output_type_and_rate__type { + dml2_output_type_unknown = 0, + dml2_output_type_dp = 1, + dml2_output_type_edp = 2, + dml2_output_type_dp2p0 = 3, + dml2_output_type_hdmi = 4, + dml2_output_type_hdmifrl = 5 +}; + +enum dml2_output_type_and_rate__rate { + dml2_output_rate_unknown = 0, + dml2_output_rate_dp_rate_hbr = 1, + dml2_output_rate_dp_rate_hbr2 = 2, + dml2_output_rate_dp_rate_hbr3 = 3, + dml2_output_rate_dp_rate_uhbr10 = 4, + dml2_output_rate_dp_rate_uhbr13p5 = 5, + dml2_output_rate_dp_rate_uhbr20 = 6, + dml2_output_rate_hdmi_rate_3x3 = 7, + dml2_output_rate_hdmi_rate_6x3 = 8, + dml2_output_rate_hdmi_rate_6x4 = 9, + dml2_output_rate_hdmi_rate_8x4 = 10, + dml2_output_rate_hdmi_rate_10x4 = 11, + dml2_output_rate_hdmi_rate_12x4 = 12, + dml2_output_rate_hdmi_rate_16x4 = 13, + dml2_output_rate_hdmi_rate_20x4 = 14 +}; + +struct dml2_pmo_options { + bool disable_vblank; + bool disable_svp; + bool disable_drr_var; + bool disable_drr_clamped; + bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; + bool disable_fams2; + bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ + bool disable_dyn_odm; + bool disable_dyn_odm_for_multi_stream; + bool disable_dyn_odm_for_stream_with_svp; + struct dml2_pmo_pstate_strategy *override_strategy_lists[DML2_MAX_PLANES]; + unsigned int num_override_strategies_per_list[DML2_MAX_PLANES]; +}; + +struct dml2_options { + enum dml2_project_id project_id; + struct dml2_pmo_options pmo_options; +}; + +struct dml2_initialize_instance_in_out { + struct dml2_instance *dml2_instance; + struct dml2_options options; + struct dml2_soc_bb soc_bb; + struct dml2_ip_capabilities ip_caps; + + struct { + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + } overrides; +}; + +struct dml2_reset_instance_in_out { + struct dml2_instance *dml2_instance; +}; + +struct dml2_check_mode_supported_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + bool is_supported; +}; + +struct dml2_mcache_surface_allocation { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Shift grainularity is not necessarily 1 + */ + struct { + int p0; + int p1; + } shift_granularity; + + /* + * MCacheIDs have global scope in the SoC, and they are stored here. + * These IDs are generally not valid until all planes in a display + * configuration have had their mcache requirements calculated. + */ + int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + + struct { + int meta_row_bytes_plane0; + int meta_row_bytes_plane1; + } informative; +}; + +enum dml2_pstate_method { + dml2_pstate_method_na = 0, + /* hw exclusive modes */ + dml2_pstate_method_vactive = 1, + dml2_pstate_method_vblank = 2, + dml2_pstate_method_reserved_hw = 5, + /* fw assisted exclusive modes */ + dml2_pstate_method_fw_svp = 6, + dml2_pstate_method_reserved_fw = 10, + /* fw assisted modes requiring drr modulation */ + dml2_pstate_method_fw_vactive_drr = 11, + dml2_pstate_method_fw_vblank_drr = 12, + dml2_pstate_method_fw_svp_drr = 13, + dml2_pstate_method_reserved_fw_drr_clamped = 20, + dml2_pstate_method_fw_drr = 21, + dml2_pstate_method_reserved_fw_drr_var = 22, + dml2_pstate_method_count +}; + +struct dml2_per_plane_programming { + const struct dml2_plane_parameters *plane_descriptor; + + union { + struct { + unsigned long dppclk_khz; + } dcn4x; + } min_clocks; + + struct dml2_mcache_surface_allocation mcache_allocation; + + // If a stream is using automatic or forced odm combine + // and the stream for this plane has num_odms_required > 1 + // num_dpps_required is always equal to num_odms_required for + // ALL planes of the stream + + // If a stream is using odm split, then this value is always 1 + unsigned int num_dpps_required; + + enum dml2_pstate_method uclk_pstate_support_method; + + // MALL size requirements for MALL SS and SubVP + unsigned int surface_size_mall_bytes; + unsigned int svp_size_mall_bytes; + + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + + struct { + bool valid; + struct dml2_plane_parameters descriptor; + struct dml2_mcache_surface_allocation mcache_allocation; + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + } phantom_plane; +}; + +union dml2_global_sync_programming { + struct { + unsigned int vstartup_lines; + unsigned int vupdate_offset_pixels; + unsigned int vupdate_vupdate_width_pixels; + unsigned int vready_offset_pixels; + unsigned int pstate_keepout_start_lines; + } dcn4x; +}; + +struct dml2_per_stream_programming { + const struct dml2_stream_parameters *stream_descriptor; + + union { + struct { + unsigned long dscclk_khz; + unsigned long dtbclk_khz; + unsigned long phyclk_khz; + } dcn4x; + } min_clocks; + + union dml2_global_sync_programming global_sync; + + unsigned int num_odms_required; + + enum dml2_pstate_method uclk_pstate_method; + + struct { + bool enabled; + struct dml2_stream_parameters descriptor; + union dml2_global_sync_programming global_sync; + } phantom_stream; + + union dmub_cmd_fams2_config fams2_base_params; + union { + union dmub_cmd_fams2_config fams2_sub_params; + union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2; + }; +}; + +//----------------- +// Mode Support Information +//----------------- + +struct dml2_mode_support_info { + bool ModeIsSupported; //<brief Is the mode support any voltage and combine setting + bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming + // Mode Support Reason + bool WritebackLatencySupport; + bool ScaleRatioAndTapsSupport; + bool SourceFormatPixelAndScanSupport; + bool P2IWith420; + bool DSCOnlyIfNecessaryWithBPP; + bool DSC422NativeNotSupported; + bool LinkRateDoesNotMatchDPVersion; + bool LinkRateForMultistreamNotIndicated; + bool BPPForMultistreamNotIndicated; + bool MultistreamWithHDMIOreDP; + bool MSOOrODMSplitWithNonDPLink; + bool NotEnoughLanesForMSO; + bool NumberOfOTGSupport; + bool NumberOfHDMIFRLSupport; + bool NumberOfDP2p0Support; + bool NumberOfTDLUT33cubeSupport; + bool WritebackScaleRatioAndTapsSupport; + bool CursorSupport; + bool PitchSupport; + bool ViewportExceedsSurface; + bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified; + bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + bool InvalidCombinationOfMALLUseForPStateAndStaticScreen; + bool InvalidCombinationOfMALLUseForPState; + bool ExceededMALLSize; + bool EnoughWritebackUnits; + bool ExceededMultistreamSlots; + bool NotEnoughDSCUnits; + bool NotEnoughDSCSlices; + bool PixelsPerLinePerDSCUnitSupport; + bool DSCCLKRequiredMoreThanSupported; + bool DTBCLKRequiredMoreThanSupported; + bool LinkCapacitySupport; + bool ROBSupport; + bool OutstandingRequestsSupport; + bool OutstandingRequestsUrgencyAvoidance; + bool PTEBufferSizeNotExceeded; + bool DCCMetaBufferSizeNotExceeded; + bool TotalVerticalActiveBandwidthSupport; + bool VActiveBandwidthSupport; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + bool USRRetrainingSupport; + bool PrefetchSupported; + bool DynamicMetadataSupported; + bool VRatioInPrefetchSupported; + bool DISPCLK_DPPCLK_Support; + bool TotalAvailablePipesSupport; + bool ViewportSizeSupport; + bool ImmediateFlipSupportedForState; + double MaxTotalVerticalActiveAvailableBandwidth; + bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming + bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required + unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode + double OutputBpp[DML2_MAX_PLANES]; + enum dml2_output_type_and_rate__type OutputType[DML2_MAX_PLANES]; + enum dml2_output_type_and_rate__rate OutputRate[DML2_MAX_PLANES]; + unsigned int AlignedYPitch[DML2_MAX_PLANES]; + unsigned int AlignedCPitch[DML2_MAX_PLANES]; + bool g6_temp_read_support; + bool temp_read_or_ppt_support; + bool qos_bandwidth_support; + bool dcfclk_support; +}; // dml2_mode_support_info + +struct dml2_display_cfg_programming { + struct dml2_display_cfg display_config; + + union { + struct { + unsigned long dcfclk_khz; + unsigned long fclk_khz; + unsigned long uclk_khz; + unsigned long socclk_khz; + unsigned long dispclk_khz; + unsigned long dcfclk_deepsleep_khz; + unsigned long dpp_ref_khz; + } dcn32x; + struct { + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } active; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } idle; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } svp_prefetch; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } svp_prefetch_no_throttle; + + unsigned long deepsleep_dcfclk_khz; + unsigned long dispclk_khz; + unsigned long dpprefclk_khz; + unsigned long dtbrefclk_khz; + unsigned long socclk_khz; + + struct { + uint32_t dispclk_did; + uint32_t dpprefclk_did; + uint32_t dtbrefclk_did; + } divider_ids; + } dcn4x; + } min_clocks; + + bool uclk_pstate_supported; + bool fclk_pstate_supported; + + /* indicates this configuration requires FW to support */ + bool fams2_required; + struct dmub_cmd_fams2_global_config fams2_global_config; + + struct { + bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition + uint8_t base_percent_efficiency; //LP1 + uint8_t low_power_percent_efficiency; //LP2 + } stutter; + + struct { + bool meets_eco; // Stutter cycles will meet Z8 ECO criteria + bool supported_in_blank; // Changing to configurations where this is false requires Z8 to be disabled during the transition + } z8_stutter; + + struct dml2_dchub_global_register_set global_regs; + + struct dml2_per_plane_programming plane_programming[DML2_MAX_PLANES]; + struct dml2_per_stream_programming stream_programming[DML2_MAX_PLANES]; + + // Don't access this structure directly, access it through plane_programming.pipe_regs + struct dml2_dchub_per_pipe_register_set pipe_regs[DML2_MAX_PLANES]; + + struct { + struct { + double urgent_us; + double writeback_urgent_us; + double writeback_pstate_us; + double writeback_fclk_pstate_us; + double cstate_exit_us; + double cstate_enter_plus_exit_us; + double z8_cstate_exit_us; + double z8_cstate_enter_plus_exit_us; + double pstate_change_us; + double fclk_pstate_change_us; + double usr_retraining_us; + double temp_read_or_ppt_watermark_us; + } watermarks; + + struct { + unsigned int swath_width_plane0; + unsigned int swath_height_plane0; + unsigned int swath_height_plane1; + unsigned int dpte_row_height_plane0; + unsigned int dpte_row_height_plane1; + unsigned int meta_row_height_plane0; + unsigned int meta_row_height_plane1; + } plane_info[DML2_MAX_PLANES]; + + struct { + unsigned int total_num_dpps_required; + } dpp; + + struct { + unsigned long long total_surface_size_in_mall_bytes; + unsigned int subviewport_lines_needed_in_mall[DML2_MAX_PLANES]; + } mall; + + struct { + double urgent_latency_us; // urgent ramp latency + double max_non_urgent_latency_us; + double max_urgent_latency_us; + double avg_non_urgent_latency_us; + double avg_urgent_latency_us; + double wm_memory_trip_us; + double meta_trip_memory_us; + double fraction_of_urgent_bandwidth; // nom + double fraction_of_urgent_bandwidth_immediate_flip; + double fraction_of_urgent_bandwidth_mall; + double max_active_fclk_change_latency_supported; + unsigned int min_return_latency_in_dcfclk; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + double dram_vm_only_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + double dram_vm_only_bw_mbps; + } sys_active; + } urg_bw_available; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } avg_bw_available; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } non_urg_bw_required; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } non_urg_bw_required_with_flip; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + + } urg_bw_required; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } urg_bw_required_with_flip; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } avg_bw_required; + } qos; + + struct { + unsigned long long det_size_in_kbytes[DML2_MAX_PLANES]; + unsigned long long DETBufferSizeY[DML2_MAX_PLANES]; + unsigned long long comp_buffer_size_kbytes; + bool UnboundedRequestEnabled; + unsigned int compbuf_reserved_space_64b; + } crb; + + struct { + unsigned int max_uncompressed_block_plane0; + unsigned int max_compressed_block_plane0; + unsigned int independent_block_plane0; + unsigned int max_uncompressed_block_plane1; + unsigned int max_compressed_block_plane1; + unsigned int independent_block_plane1; + } dcc_control[DML2_MAX_PLANES]; + + struct { + double stutter_efficiency; + double stutter_efficiency_with_vblank; + double stutter_num_bursts; + + struct { + double stutter_efficiency; + double stutter_efficiency_with_vblank; + double stutter_num_bursts; + double stutter_period; + + struct { + double stutter_efficiency; + double stutter_num_bursts; + double stutter_period; + } bestcase; + } z8; + } power_management; + + struct { + double min_ttu_vblank_us[DML2_MAX_PLANES]; + bool vready_at_or_after_vsync[DML2_MAX_PLANES]; + double min_dst_y_next_start[DML2_MAX_PLANES]; + bool cstate_max_cap_mode; + bool hw_debug5; + unsigned int dcfclk_deep_sleep_hysteresis; + unsigned int dst_x_after_scaler[DML2_MAX_PLANES]; + unsigned int dst_y_after_scaler[DML2_MAX_PLANES]; + unsigned int prefetch_source_lines_plane0[DML2_MAX_PLANES]; + unsigned int prefetch_source_lines_plane1[DML2_MAX_PLANES]; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool UsesMALLForStaticScreen[DML2_MAX_PLANES]; + unsigned int CursorDstXOffset[DML2_MAX_PLANES]; + unsigned int CursorDstYOffset[DML2_MAX_PLANES]; + unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double TimePerVMGroupVBlank[DML2_MAX_PLANES]; + double TimePerVMGroupFlip[DML2_MAX_PLANES]; + double TimePerVMRequestVBlank[DML2_MAX_PLANES]; + double TimePerVMRequestFlip[DML2_MAX_PLANES]; + double Tdmdl_vm[DML2_MAX_PLANES]; + double Tdmdl[DML2_MAX_PLANES]; + unsigned int VStartup[DML2_MAX_PLANES]; + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + + double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES]; + double time_per_pte_group_nom_luma[DML2_MAX_PLANES]; + double time_per_pte_group_nom_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_luma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_luma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_chroma[DML2_MAX_PLANES]; + double VRatioPrefetchY[DML2_MAX_PLANES]; + double VRatioPrefetchC[DML2_MAX_PLANES]; + double DestinationLinesForPrefetch[DML2_MAX_PLANES]; + double DestinationLinesToRequestVMInVBlank[DML2_MAX_PLANES]; + double DestinationLinesToRequestRowInVBlank[DML2_MAX_PLANES]; + double DestinationLinesToRequestVMInImmediateFlip[DML2_MAX_PLANES]; + double DestinationLinesToRequestRowInImmediateFlip[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + + double WritebackRequiredBandwidth; + double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; + double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; + double DSCCLK_calculated[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + double DSCDelay[DML2_MAX_PLANES]; + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY + bool ROBUrgencyAvoidance; + double LowestPrefetchMargin; + + unsigned int pstate_recout_reduction_lines[DML2_MAX_PLANES]; + } misc; + + struct dml2_mode_support_info mode_support_info; + unsigned int voltage_level; // LEGACY_ONLY + + // For DV only + // This is what dml core calculated, only on the full_vp width and assume we have + // unlimited # of mcache + struct dml2_mcache_surface_allocation non_optimized_mcache_allocation[DML2_MAX_PLANES]; + + bool failed_prefetch; + bool failed_uclk_pstate; + bool failed_mcache_validation; + bool failed_dpmm; + bool failed_mode_programming; + bool failed_mode_programming_dcfclk; + bool failed_mode_programming_prefetch; + bool failed_mode_programming_flip; + bool failed_map_watermarks; + } informative; +}; + +struct dml2_build_mode_programming_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_build_mcache_programming_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + + struct dml2_plane_mcache_configuration_descriptor mcache_configurations[DML2_MAX_PLANES]; + char num_configurations; + + /* + * Outputs + */ + // per_plane_pipe_mcache_regs[i][j] refers to the proper programming for the j-th pipe of the + // i-th plane (from mcache_configurations) + struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + + // It's not a good idea to reference this directly, better to use the pointer structure above instead + struct dml2_hubp_pipe_mcache_regs mcache_regs_set[DML2_MAX_DCN_PIPES]; +}; + +struct dml2_unit_test_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; +}; + + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c new file mode 100644 index 000000000000..eba948e187c1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_internal_shared_types.h" +#include "dml2_core_shared_types.h" +#include "dml2_core_dcn4.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +struct dml2_core_ip_params core_dcn4_ip_caps_base = { + // Hardcoded values for DCN3x + .vblank_nom_default_us = 668, + .remote_iommu_outstanding_translations = 256, + .rob_buffer_size_kbytes = 128, + .config_return_buffer_size_in_kbytes = 1280, + .config_return_buffer_segment_size_in_kbytes = 64, + .compressed_buffer_segment_size_in_kbytes = 64, + .dpte_buffer_size_in_pte_reqs_luma = 68, + .dpte_buffer_size_in_pte_reqs_chroma = 36, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .writeback_chunk_size_kbytes = 8, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + //Number of pipes after DCN Pipe harvesting + .max_num_dpp = 4, + .max_num_opp = 4, + .max_num_otg = 4, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .cursor_buffer_size = 24, + .cursor_chunk_size = 2, + .dispclk_delay_subtotal = 125, + .max_inter_dcn_tile_repeaters = 8, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .writeback_line_buffer_buffer_size = 0, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 5760, + .dsc422_native_support = true, + .dcc_supported = true, + .ptoi_supported = false, + + .cursor_64bpp_support = true, + .dynamic_metadata_vm_enabled = false, + + .max_num_dp2p0_outputs = 4, + .max_num_dp2p0_streams = 4, + .imall_supported = 1, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .words_per_channel = 16, + + .subvp_fw_processing_delay_us = 15, + .subvp_pstate_allow_width_us = 20, + .subvp_swath_height_margin_lines = 16, +}; + +static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) +{ + ip_caps->pipe_count = ip_params->max_num_dpp; + ip_caps->otg_count = ip_params->max_num_otg; + ip_caps->num_dsc = ip_params->num_dsc; + ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams; + ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs; + ip_caps->max_num_hdmi_frl_outputs = ip_params->max_num_hdmi_frl_outputs; + ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes; + ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes; + ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes; + ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; + ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; + ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size; + ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; + ip_caps->hostvm_mode = ip_params->hostvm_mode; + + // FIXME_STAGE2: cleanup after adding all dv override to ip_caps + ip_caps->subvp_drr_scheduling_margin_us = 100; + ip_caps->subvp_prefetch_end_to_mall_start_us = 15; + ip_caps->subvp_fw_processing_delay = 16; + +} + +static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps) +{ + ip_params->max_num_dpp = ip_caps->pipe_count; + ip_params->max_num_otg = ip_caps->otg_count; + ip_params->num_dsc = ip_caps->num_dsc; + ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; + ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs; + ip_params->max_num_hdmi_frl_outputs = ip_caps->max_num_hdmi_frl_outputs; + ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes; + ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes; + ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes; + ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; + ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; + ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size; + ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; + ip_params->hostvm_mode = ip_caps->hostvm_mode; +} + +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) +{ + struct dml2_core_instance *core = in_out->instance; + + if (!in_out->minimum_clock_table) + return false; + else + core->minimum_clock_table = in_out->minimum_clock_table; + + if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) { + memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size); + + // FIXME_STAGE2: + // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info + // Should move DV to use ip_caps but need move more overrides to ip_caps + patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb); + core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines; + } else { + memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); + patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps); + core->clean_me_up.mode_lib.ip.imall_supported = false; + } + + memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + + return true; +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg, + const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index; + unsigned int total_main_mcaches_required = 0; + int total_pipe_regs_copied = 0; + int dml_internal_pipe_index = 0; + const struct dml2_plane_parameters *main_plane; + const struct dml2_plane_parameters *phantom_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + // Copy the unexpanded display config to output + memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + + // Set the global register values + dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs); + // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion + // display config + dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]); + + // Check if FAMS2 is required + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); + } + + // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) + for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) { + main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]]; + + // Set the descriptor + programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index]; + + // Set the odm combine factor + programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used; + + // Check if the stream has implicit SVP enabled + if (main_stream != phantom_stream) { + // If so, copy the phantom stream descriptor + programming->stream_programming[stream_index].phantom_stream.enabled = true; + memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters)); + } else { + programming->stream_programming[stream_index].phantom_stream.enabled = false; + } + + // Due to the way DML indexes data internally, it's easier to populate the rest of the display + // stream programming in the next stage + } + + dml_internal_pipe_index = 0; + total_pipe_regs_copied = 0; + stream_already_populated_mask = 0x0; + + // Loop over all main planes + for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) { + main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + + // Set the descriptor + programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index]; + + // Set the mpc combine factor + programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + // Setup the appropriate p-state strategy + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; + } else { + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + memcpy(&programming->plane_programming[plane_index].mcache_allocation, + &display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 + + programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 - + (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0); + + for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the main plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) { + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index); + + programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method; + + /* unconditionally populate fams2 params */ + dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, + display_cfg, + &programming->stream_programming[main_plane->stream_index].fams2_base_params, + &programming->stream_programming[main_plane->stream_index].fams2_sub_params, + programming->stream_programming[main_plane->stream_index].uclk_pstate_method, + plane_index); + + stream_already_populated_mask |= (0x1 << main_plane->stream_index); + } + dml_internal_pipe_index++; + } + } + + for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) { + phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index]; + main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index]; + + programming->plane_programming[main_plane_index].phantom_plane.valid = true; + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters)); + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index); + + /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */ + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation, + &programming->plane_programming[main_plane_index].mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index]; + } + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index]; + } + + for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the phantom plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index); + // Populate the phantom stream specific programming + if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) { + dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << phantom_plane->stream_index); + } + + dml_internal_pipe_index++; + } + } +} + +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals; + + bool result; + unsigned int i, stream_index, stream_bitmask; + int unsigned odm_count, num_odm_output_segments, dpp_count; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_support_ex_params.min_clk_table = in_out->min_clk_table; + l->mode_support_ex_params.min_clk_index = in_out->min_clk_index; + l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info; + + result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params); + + in_out->mode_support_result.cfg_support_info.is_supported = result; + + if (result) { + in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000); + in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000); + in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000); + + in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported; + in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported; + + in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000); + in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000); + + + in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000; + in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + + in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); + + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); + } + + stream_bitmask = 0; + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + num_odm_output_segments = 1; + + switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) { + case dml2_odm_mode_bypass: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + case dml2_odm_mode_combine_2to1: + odm_count = 2; + dpp_count = 2; + break; + case dml2_odm_mode_combine_3to1: + odm_count = 3; + dpp_count = 3; + break; + case dml2_odm_mode_combine_4to1: + odm_count = 4; + dpp_count = 4; + break; + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + num_odm_output_segments = 2; + break; + case dml2_odm_mode_mso_1to4: + num_odm_output_segments = 4; + break; + case dml2_odm_mode_auto: + default: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + } + + in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count; + + dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i); + + stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; + + in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; + DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); + + if (!((stream_bitmask >> stream_index) & 0x1)) { + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_odm_output_segments = num_odm_output_segments; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i]; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i]; + dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i); + in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000); + stream_bitmask |= 0x1 << stream_index; + } + } + } + + return result; +} + +static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb) +{ + int i; + + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i]) + return i; + } + return 0; +} + +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals; + + bool result = false; + unsigned int pipe_offset; + int dml_internal_pipe_index; + int total_pipe_regs_copied = 0; + int stream_already_populated_mask = 0; + + int main_stream_index; + unsigned int plane_index; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; + l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; + l->mode_programming_ex_params.programming = in_out->programming; + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, + &core->clean_me_up.mode_lib.soc); + + result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); + + if (result) { + // If the input display configuration contains implict SVP, we need to use a special packer + if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) { + pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch); + } else { + memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg)); + + dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs); + dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]); + + dml_internal_pipe_index = 0; + + for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { + in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation, + &in_out->display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + + for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index]; + + // Assign storage for this pipe's register values + in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied]; + memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate + dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index; + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_stream_index))) { + in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index]; + in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used; + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << main_stream_index); + } + dml_internal_pipe_index++; + } + } + } + } + + return result; +} + +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib; + + if (in_out->mode_is_supported) + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index; + else + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index; + + dml2_core_calcs_get_informative(mode_lib, in_out->programming); + return true; +} + +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out) +{ + memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation)); + + dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index); + + if (in_out->mcache_allocation->num_mcaches_plane0 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width; + + if (in_out->mcache_allocation->num_mcaches_plane1 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width; + + in_out->mcache_allocation->requires_dedicated_mall_mcache = false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h new file mode 100644 index 000000000000..a68bb001a346 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_DCN4_H__ +#define __DML2_CORE_DCN4_H__ +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out); +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out); +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out); +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c new file mode 100644 index 000000000000..a02e9fd6b5ca --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -0,0 +1,13342 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml_top_types.h" + +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 +#define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define DML_MAX_COMPRESSION_RATIO 4 +//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW +//#define DML_GLOBAL_PREFETCH_CHECK +#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE +#define DML_MAX_VSTARTUP_START 1023 + +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); +} + +static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); + } +} + +static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); + return num_active_pipes; +} + +static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); + DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + return is_phantom; +} + +#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \ +{ \ +unsigned int plane_idx; \ +plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes); +dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes); +dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY); +dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC); +dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear); +dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma); + +dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup); +dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); +dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); +dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); +dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); +dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); +dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); +dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL); + +#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \ +{ \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l); +dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l); +dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l); +dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c); +dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c); +dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c); +dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l); +dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c); +dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache); +dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL); +dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines); + +#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \ +{ \ +return (type) interval_var[plane_idx][array_idx]; \ +} + +dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l); +dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c); + +#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \ +{ \ +return (type) internal_var; \ +} + +dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark); +dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark); +dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); +dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark); +dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); +dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); + +dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); +dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); +dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); +dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); +dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); +dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); +dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); +dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); +dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); +dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); +dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); +dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency); +dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod); +dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase); +dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); +dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase); +dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); +dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0); + +dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + +dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); + +dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); +dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); +dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); +dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); + +dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte); + +dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled); +dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark); +dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b); +dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5); +dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + +static void CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + bool is_mrq_present, + + // Output + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte) +{ + if (is_mrq_present) + *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64); + else + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + + *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + + DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) +{ + //unsigned int num_active_planes = display_cfg->num_planes; + + //Progressive To Interlace Unit Effect + for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { + PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { + // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly + //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; + } + } +} + +static bool dml_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { + DML_ASSERT(0); + return 256; + } +} + +static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); + } + + return version; +} + +static void CalculateBytePerPixelAndBlockSizes( + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + unsigned int pitch_y, + unsigned int pitch_c, + + // Output + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC, + bool *surf_linear128_l, + bool *surf_linear128_c) +{ + *BytePerPixelDETY = 0; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 1; + + if (SourcePixelFormat == dml2_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml2_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml2_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else if (SourcePixelFormat == dml2_420_10) { + *BytePerPixelDETY = (double)(4.0 / 3); + *BytePerPixelDETC = (double)(8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML_ASSERT(0); + } + + DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); + DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); + DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + *surf_linear128_l = 0; + *surf_linear128_c = 0; + } else { + if (SurfaceTiling == dml2_sw_linear) { + *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); + + if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) + *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); + } + } + + if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml2_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml2_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { // dual plane + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml2_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + if (SurfaceTiling == dml2_gfx11_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + } else { + unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); + unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling + + if (SurfaceTiling == dml2_sw_linear) { + macro_tile_scale = 1; + } else if (SurfaceTiling == dml2_sw_4kb_2d) { + macro_tile_scale = 4; + } else if (SurfaceTiling == dml2_sw_64kb_2d) { + macro_tile_scale = 16; + } else if (SurfaceTiling == dml2_sw_256kb_2d) { + macro_tile_scale = 32; + } else { + DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML_ASSERT(0); + } + + *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; + *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; + } + } + + DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +} + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum dml2_source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + // Output + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP) +{ + double DPPCLKUsingSingleDPPLuma; + double DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +static void CalculateSwathWidth( + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum dml2_odm_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + bool surf_linear128_l[], + bool surf_linear128_c[], + unsigned int DPPPerSurface[], + + // Output + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + unsigned int SwathWidthSingleDPPY[], // post-rotated plane width + unsigned int SwathWidthSingleDPPC[], + unsigned int SwathWidthY[], // per-pipe + unsigned int SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]) // per-pipe +{ + enum dml2_odm_mode MainSurfaceODMMode; + double odm_hactive_factor = 1.0; + unsigned int req_width_horz_y; + unsigned int req_width_horz_c; + unsigned int surface_width_ub_l; + unsigned int surface_height_ub_l; + unsigned int surface_width_ub_c; + unsigned int surface_height_ub_c; + + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + } else { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + } + + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + + MainSurfaceODMMode = ODMMode[k]; + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) + odm_hactive_factor = 4.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) + odm_hactive_factor = 3.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) + odm_hactive_factor = 2.0; + + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { + SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + + DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + req_width_horz_y = Read256BytesBlockWidthY[k]; + req_width_horz_c = Read256BytesBlockWidthC[k]; + + if (surf_linear128_l[k]) + req_width_horz_y = req_width_horz_y / 2; + + if (surf_linear128_c[k]) + req_width_horz_c = req_width_horz_c / 2; + + surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); + surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); + surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); + surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + + req_per_swath_ub_l[k] = 0; + req_per_swath_ub_c[k] = 0; + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; + + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; + } else { + swath_width_chroma_ub[k] = 0; + } + } + + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); + } +} + +static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) +{ + bool unb_req_ok = false; + bool unb_req_en = false; + + unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + unb_req_en = unb_req_ok; + + if (unb_req_force_en) { + unb_req_en = unb_req_force_val && unb_req_ok; + } + DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); + return unb_req_en; +} + +static void CalculateDETBufferSize( + struct dml2_core_shared_CalculateDETBufferSize_locals *l, + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + bool UnboundedRequestEnabled, + unsigned int nomDETInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MinCompressedBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInkByte, + unsigned int CompressedBufferSegmentSizeInkByte, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int full_swath_bytes_l[], + unsigned int full_swath_bytes_c[], + unsigned int DPPPerSurface[], + // Output + unsigned int DETBufferSizeInKByte[], + unsigned int *CompressedBufferSizeInkByte) +{ + memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals)); + + bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; + bool NextPotentialSurfaceToAssignDETPieceFound; + bool MinimizeReallocationSuccess = false; + + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb; + } else { + DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + l->DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = 0; + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; + } else { + l->max_minDET = nomDETInKByte; + } + l->minDET = 128; + l->minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) { + if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET) + l->minDET_pipe = l->minDET; + l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; + } + + DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); + DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); + + if (l->minDET_pipe == 0) { + l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); + } + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETBufferSizeInKByte[k] = 0; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = l->minDET_pipe; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; + } + + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); + } + + if (display_cfg->minimize_det_reallocation) { + MinimizeReallocationSuccess = true; + // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global + // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on + // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a + // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. + + // Calculate total pixel rate + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz; + } + + // Calculate per stream DET budget + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate); + l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k]; + } + + // Calculate the per stream total bandwidth + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + + // Check the minimum can be satisfied by budget + if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); + } else { + MinimizeReallocationSuccess = false; + break; + } + } + } + + if (MinimizeReallocationSuccess) { + // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams + // budget proportionally across its planes + l->ResidualDETAfterRounding = MaxTotalDETInKByte; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index]) + * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]); + + if (l->IdealDETBudget > DETBufferSizeInKByte[k]) { + l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k]; + if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]) + l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]; + + /* split the additional budgeted DET among the pipes per plane */ + DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k])); + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget; + } + + // Round down to segment size + DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte; + + l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); + } + } + } + } + + if (!MinimizeReallocationSuccess) { + l->TotalBandwidth = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + } + DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + } + DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); + l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } + DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); + } + + for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + l->NextSurfaceToAssignDETPiece = 0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { + l->NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + l->NextDETBufferPieceInKByte = (unsigned int)(math_min2( + math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)) + * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, + math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); + + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); + + DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); + + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + } + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; + + DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +} + +static double CalculateRequiredDispclk( + enum dml2_odm_mode ODMMode, + double PixelClock, + bool isTMDS420) +{ + double DispClk; + + if (ODMMode == dml2_odm_mode_combine_4to1) { + DispClk = PixelClock / 4.0; + } else if (ODMMode == dml2_odm_mode_combine_3to1) { + DispClk = PixelClock / 3.0; + } else if (ODMMode == dml2_odm_mode_combine_2to1) { + DispClk = PixelClock / 2.0; + } else { + DispClk = PixelClock; + } + + if (isTMDS420) { + double TMDS420MinPixClock = PixelClock / 2.0; + DispClk = math_max2(DispClk, TMDS420MinPixClock); + } + + return DispClk; +} + +static double TruncToValidBPP( + struct dml2_core_shared_TruncToValidBPP_locals *l, + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + + // Output + unsigned int *RequiredSlots) +{ + double MaxLinkBPP; + unsigned int MinDSCBPP; + double MaxDSCBPP; + unsigned int NonDSCBPP0; + unsigned int NonDSCBPP1; + unsigned int NonDSCBPP2; + enum dml2_odm_mode ODMMode; + + if (Format == dml2_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 16; + } else if (Format == dml2_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 16; + } else { + + if (Output == dml2_hdmi || Output == dml2_hdmifrl) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml2_n422 || Output == dml2_hdmifrl) { + MinDSCBPP = 7; + MaxDSCBPP = 16; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 16; + } + } + + if (Output == dml2_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml2_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; + + if (ODMMode == dml2_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML2_CALCS_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML2_CALCS_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML2_CALCS_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} + +// updated for dcn4 +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum dml2_output_format_class pixelFormat, + enum dml2_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; + + if (pixelFormat == dml2_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml2_444) + pixelsPerClock = 1; + else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); + + //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) + slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; + + padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; + + if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { + if ((initial_xmit_delay + padding_pixels) % 3 == 1) { + initial_xmit_delay++; + } + } + + //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard + if (bpc == 8) + ssm_group_priming_delay = 83; + else if (bpc == 10) + ssm_group_priming_delay = 91; + else if (bpc == 12) + ssm_group_priming_delay = 115; + else if (bpc == 14) + ssm_group_priming_delay = 123; + else + ssm_group_priming_delay = 128; + + //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice + slice_width_groups = (slice_width_modified + 2) / 3; + + //determine number of padded pixels in the last group of a slice line, computed as + slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; + + //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered + number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; + + //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay + //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay + ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; + + //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels + ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; + + //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay + groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; + + //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice + //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice + lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next + + //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached + //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay + additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; + + //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) + ssm_pipeline_delay = 2; + + //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) + obsm_pipeline_delay = 1; + + //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes + if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + cycles_per_group = 6; + else + cycles_per_group = 3; + //delay of the bit stream contruction layer in pixels is the sum of: + //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice + //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) + //3. additional group of delay if initial transmit delay is reached exactly in a group + //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) + group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; + pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; + + //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) + pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; + + DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); + DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); + DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); + DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); + return pixels; +} + +//updated in dcn4 +static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) +{ + unsigned int Delay = 0; + unsigned int dispclk_per_dscclk = 3; + + // sfr + Delay = Delay + 2; + + if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + dispclk_per_dscclk = 3 * 2; + } + + if (pixelFormat == dml2_420) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 5; + + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_s422) { + //dscc top delay for pixel compression layer + Delay = Delay + 17 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } + + // sft + Delay = Delay + 1; + DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); + + return Delay; +} + +static unsigned int CalculateHostVMDynamicLevels( + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + unsigned int HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) +{ + unsigned int extra_dpde_bytes; + unsigned int extra_mpde_bytes; + unsigned int MacroTileSizeBytes; + unsigned int vp_height_dpte_ub; + + unsigned int meta_surface_bytes; + unsigned int vm_bytes; + unsigned int vp_height_meta_ub; + unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; + *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; + if (p->SurfaceTiling == dml2_sw_linear) { + *p->meta_row_height = 32; + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->meta_row_height = *p->MetaRequestHeight; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); + } else { + *p->meta_row_height = *p->MetaRequestWidth; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); + } + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } else { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } + + meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); + DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); + if (p->GPUVMEnable == true) { + double meta_vmpg_bytes = 4.0 * 1024.0; + *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); + extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); + } else { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + } + + if (!p->DCCEnable || !p->mrq_present) { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + *p->meta_row_bytes = 0; + } + + if (!p->GPUVMEnable) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRowStorage = 0; + *p->dpte_row_width_ub = 0; + *p->dpte_row_height = 0; + *p->dpte_row_height_linear = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + *p->dpte_row_width_ub_one_row_per_frame = 0; + *p->dpte_row_height_one_row_per_frame = 0; + *p->vmpg_width = 0; + *p->vmpg_height = 0; + *p->PixelPTEReqWidth = 0; + *p->PixelPTEReqHeight = 0; + *p->PTERequestSize = 0; + *p->dpde0_bytes_per_frame_ub = 0; + return 0; + } + + MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); + } else { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); + } + + if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { + *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); + extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); + } else { + *p->dpde0_bytes_per_frame_ub = 0; + extra_dpde_bytes = 0; + } + + vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; + + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->PixelPTEReqHeight = 1; + *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + *p->PTERequestSize = 64; + + *p->vmpg_height = 1; + *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; + } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile + // one 64KB tile, is 16x16x256B req + *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; + *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; + *p->PTERequestSize = 128; + + *p->vmpg_height = *p->PixelPTEReqHeight; + *p->vmpg_width = *p->PixelPTEReqWidth; + } else { + // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + if (p->GPUVMEnable == true) { + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); + DML_ASSERT(0); + } + } + + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); + + *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + *p->dpte_row_height_linear = 0; + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); + *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); + if (*p->dpte_row_height_linear > 128) + *p->dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->dpte_row_height = *p->PixelPTEReqHeight; + + if (p->GPUVMMinPageSizeKBytes > 64) { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); + } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); +#endif + + *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; + } else { + *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); + + if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); + } + + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); +#endif + } + + if (p->GPUVMEnable != true) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + } + + *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); +#endif + + return vm_bytes; +} // CalculateVMAndRowBytes + +static unsigned int CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dml2_rotation_angle RotationAngle, + bool mirrored, + bool ViewportStationary, + unsigned int SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + // Output + unsigned int *VInitPreFill, + unsigned int *MaxNumSwath) +{ + + unsigned int vp_start_rot = 0; + unsigned int sw0_tmp = 0; + unsigned int MaxPartialSwath = 0; + double numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); + DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (RotationAngle == dml2_rotation_180) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { + vp_start_rot = ViewportXStart; + } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (unsigned int)(numLines); + +} + +static void CalculateRowBandwidth( + bool GPUVMEnable, + bool use_one_row_for_frame, + enum dml2_source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + + bool mrq_present, + unsigned int meta_row_bytes_per_row_ub_l, + unsigned int meta_row_bytes_per_row_ub_c, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + + // Output + double *dpte_row_bw, + double *meta_row_bw) +{ + if (!DCCEnable || !mrq_present) { + *meta_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) + + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateMALLUseForStaticScreen( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + // Output + bool is_using_mall_for_ss[]) +{ + + unsigned int SurfaceToAddToMALL; + bool CanAddAnotherSurfaceToMALL; + unsigned int TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); + if (is_using_mall_for_ss[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && + !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + } + } + if (CanAddAnotherSurfaceToMALL) { + is_using_mall_for_ss[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum dml2_source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dml2_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dml2_rotation_angle RotationAngle, + + // Output + enum dml2_core_internal_request_type *RequestLuma, + enum dml2_core_internal_request_type *RequestChroma, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; + + unsigned int segment_order_horz_contiguous_luma; + unsigned int segment_order_horz_contiguous_chroma; + unsigned int segment_order_vert_contiguous_luma; + unsigned int segment_order_vert_contiguous_chroma; + + unsigned int req128_horz_wc_l; + unsigned int req128_horz_wc_c; + unsigned int req128_vert_wc_l; + unsigned int req128_vert_wc_c; + + unsigned int yuv420; + unsigned int horz_div_l; + unsigned int horz_div_c; + unsigned int vert_div_l; + unsigned int vert_div_c; + + unsigned int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + unsigned int MAS_vp_horz_limit; + unsigned int MAS_vp_vert_limit; + unsigned int max_vp_horz_width; + unsigned int max_vp_vert_height; + unsigned int eff_surf_width_l; + unsigned int eff_surf_width_c; + unsigned int eff_surf_height_l; + unsigned int eff_surf_height_c; + + unsigned int full_swath_bytes_horz_wc_l; + unsigned int full_swath_bytes_horz_wc_c; + unsigned int full_swath_bytes_vert_wc_l; + unsigned int full_swath_bytes_vert_wc_c; + + if (dml_is_420(SourcePixelFormat)) + yuv420 = 1; + else + yuv420 = 0; + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml2_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml2_420_10) { + full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else if (!dml_is_vertical_rotation(RotationAngle)) { + if (req128_horz_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else { + if (req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } + + if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} + +static void calculate_mcache_row_bytes( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) +{ + unsigned int vmpg_bytes = 0; + unsigned int blk_bytes = 0; + float meta_per_mvmpg_per_channel = 0; + unsigned int est_blk_per_vmpg = 2; + unsigned int mvmpg_per_row_ub = 0; + unsigned int full_vp_width_mvmpg_aligned = 0; + unsigned int full_vp_height_mvmpg_aligned = 0; + unsigned int meta_per_mvmpg_per_channel_ub = 0; + unsigned int mvmpg_per_mcache; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); + DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); + DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); +#endif + DML_ASSERT(p->mcache_line_size_bytes != 0); + DML_ASSERT(p->mcache_size_bytes != 0); + + *p->mvmpg_width = 0; + *p->mvmpg_height = 0; + + if (p->full_vp_height == 0 && p->full_vp_width == 0) { + *p->num_mcaches = 0; + *p->mcache_row_bytes = 0; + *p->mcache_row_bytes_per_channel = 0; + } else { + blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); + + // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. + // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. + // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. + *p->mvmpg_width = p->blk_width; + *p->mvmpg_height = p->blk_height; + if (p->gpuvm_enable) { + if (vmpg_bytes >= blk_bytes) { + *p->mvmpg_width = p->vmpg_width; + *p->mvmpg_height = p->vmpg_height; + } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { + DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML_ASSERT(0); + } + } + + //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c + full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); + full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); + + *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; + + //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. + if (!p->surf_vert) { //horizontal access + if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) + *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; + } else { //vertical access + if (p->vp_stationary == 1) + *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; + } + + if (p->gpuvm_enable) { + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; + + //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic + if (p->surf_vert && vmpg_bytes > blk_bytes) { + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; + } + + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom + } else { + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; + + if (!p->surf_vert) + *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; + else + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); + } + + meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); + + //but for 4KB vmpg with 64KB tile blk + if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) + meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; + + // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, + // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. + if (p->gpuvm_enable || p->surf_vert) { + *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; + *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans; + } else { // horizontal and gpuvm disable + *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; + if (p->mcache_line_size_bytes != 0) + *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); + } + + *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref + if (p->mcache_size_bytes != 0) + *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1); + + mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; + *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); + DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); +#endif + DML_ASSERT(*p->num_mcaches > 0); +} + +static void calculate_mcache_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_setting_params *p) +{ + unsigned int n; + + struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; + memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); + + *p->num_mcaches_l = 0; + *p->mcache_row_bytes_l = 0; + *p->mcache_row_bytes_per_channel_l = 0; + *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; + + *p->num_mcaches_c = 0; + *p->mcache_row_bytes_c = 0; + *p->mcache_row_bytes_per_channel_c = 0; + *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; + + *p->mall_comb_mcache_l = 0; + *p->mall_comb_mcache_c = 0; + *p->lc_comb_mcache = 0; + + if (!p->dcc_enable) + return; + + l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; + + l->l_p.num_chans = p->num_chans; + l->l_p.mem_word_bytes = p->mem_word_bytes; + l->l_p.mcache_size_bytes = p->mcache_size_bytes; + l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->l_p.gpuvm_enable = p->gpuvm_enable; + l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->l_p.surf_vert = p->surf_vert; + l->l_p.vp_stationary = p->vp_stationary; + l->l_p.tiling_mode = p->tiling_mode; + l->l_p.vp_start_x = p->vp_start_x_l; + l->l_p.vp_start_y = p->vp_start_y_l; + l->l_p.full_vp_width = p->full_vp_width_l; + l->l_p.full_vp_height = p->full_vp_height_l; + l->l_p.blk_width = p->blk_width_l; + l->l_p.blk_height = p->blk_height_l; + l->l_p.vmpg_width = p->vmpg_width_l; + l->l_p.vmpg_height = p->vmpg_height_l; + l->l_p.full_swath_bytes = p->full_swath_bytes_l; + l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; + + // output + l->l_p.num_mcaches = p->num_mcaches_l; + l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; + l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l; + l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; + l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; + l->l_p.mvmpg_width = &l->mvmpg_width_l; + l->l_p.mvmpg_height = &l->mvmpg_height_l; + l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; + l->l_p.meta_row_width_ub = &l->meta_row_width_l; + l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; + + calculate_mcache_row_bytes(scratch, &l->l_p); + DML_ASSERT(*p->num_mcaches_l > 0); + + if (l->is_dual_plane) { + l->c_p.num_chans = p->num_chans; + l->c_p.mem_word_bytes = p->mem_word_bytes; + l->c_p.mcache_size_bytes = p->mcache_size_bytes; + l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->c_p.gpuvm_enable = p->gpuvm_enable; + l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->c_p.surf_vert = p->surf_vert; + l->c_p.vp_stationary = p->vp_stationary; + l->c_p.tiling_mode = p->tiling_mode; + l->c_p.vp_start_x = p->vp_start_x_c; + l->c_p.vp_start_y = p->vp_start_y_c; + l->c_p.full_vp_width = p->full_vp_width_c; + l->c_p.full_vp_height = p->full_vp_height_c; + l->c_p.blk_width = p->blk_width_c; + l->c_p.blk_height = p->blk_height_c; + l->c_p.vmpg_width = p->vmpg_width_c; + l->c_p.vmpg_height = p->vmpg_height_c; + l->c_p.full_swath_bytes = p->full_swath_bytes_c; + l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; + + // output + l->c_p.num_mcaches = p->num_mcaches_c; + l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; + l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c; + l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; + l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; + l->c_p.mvmpg_width = &l->mvmpg_width_c; + l->c_p.mvmpg_height = &l->mvmpg_height_c; + l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; + l->c_p.meta_row_width_ub = &l->meta_row_width_c; + l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; + + calculate_mcache_row_bytes(scratch, &l->c_p); + DML_ASSERT(*p->num_mcaches_c > 0); + } + + // Sharing for iMALL access + l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes; + l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes; + l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; + l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; + + if (p->imall_enable) { + *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); + + if (l->is_dual_plane) + *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); + } + + if (!p->surf_vert) // horizonatal access + l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; + else // vertical access + l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; + + // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: + if (*p->num_mcaches_l) { + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + } + if (l->is_dual_plane) { + l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; + + /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */ + if (l->mcache_remainder_l && l->mcache_remainder_c) { + if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { + l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + + (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); + } + *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + + if (l->is_dual_plane) { + DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + } +#endif + // calculate split_coordinate + l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; + l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; + + for (n = 0; n < *p->num_mcaches_l - 1; n++) { + p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; + } + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c - 1; n++) { + p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; + } + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + } +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + } +#endif + + // Luma/Chroma combine in the last mcache + // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary + if (*p->lc_comb_mcache && l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_l - 1; n++) + p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + for (n = 0; n < *p->num_mcaches_c - 1; n++) + p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + for (n = 0; n < *p->num_mcaches_c; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); +#endif + } + + *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; + *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; +} + +static void calculate_mall_bw_overhead_factor( + double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref + double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes) +{ + for (unsigned int k = 0; k < num_active_planes; ++k) { + mall_prefetch_sdp_overhead_factor[k] = 1.0; + mall_prefetch_dram_overhead_factor[k] = 1.0; + + // SDP - on the return side + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return + mall_prefetch_sdp_overhead_factor[k] = 1.25; + else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + mall_prefetch_sdp_overhead_factor[k] = 0.25; + + // DRAM + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mall_prefetch_dram_overhead_factor[k] = 2.0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); +#endif + } +} + +static double dml_get_return_bandwidth_available( + const struct dml2_soc_bb *soc, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool is_avg_bw, + bool is_hvm_en, + bool is_hvm_only, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + double return_bw_mbps = 0.; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; + double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; + double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; + + double derate_sdp_factor; + double derate_fabric_factor; + double derate_dram_factor; + + double derate_sdp_bandwidth; + double derate_fabric_bandwidth; + double derate_dram_bandwidth; + + if (is_avg_bw) { + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; + } + } else { // urgent bw + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + } + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; + } + } + } + + derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; + derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; + derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; + + if (bw_type == dml2_core_internal_bw_sdp) + return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); + else // dml2_core_internal_bw_dram + return_bw_mbps = derate_dram_bandwidth; + + DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); + return return_bw_mbps; +} + +static noinline_for_stack void calculate_bandwidth_available( + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], + + const struct dml2_soc_bb *soc, + bool HostVMEnable, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + unsigned int n, m; + + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + + // Calculate all the bandwidth availabe + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, + m, // soc_state + n, // bw_type + 1, // avg_bw + HostVMEnable, + 0, // hvm_only + dcfclk_mhz, + fclk_mhz, + dram_bw_mbps); + + urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); +#endif + + // urg_bandwidth_available_vm_only is indexed by soc_state + if (n == dml2_core_internal_bw_dram) { + urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + } + } + + avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); +#endif + } +} + +static void calculate_avg_bandwidth_required( + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double mall_prefetch_dram_overhead_factor[], + double mall_prefetch_sdp_overhead_factor[]) +{ + unsigned int n, m, k; + double sdp_overhead_factor; + double dram_overhead_factor_p0; + double dram_overhead_factor_p1; + + // Average BW support check + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram + avg_bandwidth_required[m][n] = 0; + } + } + + // SysActive and SVP Prefetch AVG bandwidth Check + for (k = 0; k < num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); +#endif + + sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; + dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; + dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; + + // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? + // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + } + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +#endif + } +} + +static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) +{ + struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; + + s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightC, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->vm_bytes_c = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; + + s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); +#endif + p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); + p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; + p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k]; + p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); +#endif + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; + } + + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); +#ifdef __DML_VBA_DEBUG__ + if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + } +#endif + } + + CalculateMALLUseForStaticScreen( + p->display_cfg, + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->is_using_mall_for_ss); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable) { + if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { + p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); + p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); + } else { + p->PTE_BUFFER_MODE[k] = 0; + p->BIGK_FRAGMENT_SIZE[k] = 0; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DCCMetaBufferSizeNotExceeded[k] = true; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k]; + p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k]; + + // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->display_cfg->gpuvm_enable, + p->use_one_row_for_frame[k], + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + p->mrq_present, + p->meta_row_bytes_per_row_ub_l[k], + p->meta_row_bytes_per_row_ub_c[k], + p->meta_row_height_luma[k], + p->meta_row_height_chroma[k], + + // Output + &p->dpte_row_bw[k], + &p->meta_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int urgent_ramp_uclk_cycles, + unsigned int df_qos_response_time_fclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_urgent_ramp_latency_margin, + double fabric_max_transport_latency_margin) +{ + double urgent_latency = 0; + if (qos_type == dml2_qos_param_type_dcn4x) { + urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); + } else { + urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + } +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + } else { + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + } + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); +#endif + return urgent_latency; +} + +static double CalculateTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int trip_to_memory_uclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4x) { + trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + } else { + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); +#endif + + + return trip_to_memory_us; +} + +static double CalculateMetaTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int meta_trip_to_memory_uclk_cycles, + unsigned int meta_trip_to_memory_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double meta_trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4x) { + meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + meta_trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + } else { + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); +#endif + + + return meta_trip_to_memory_us; +} + +static void calculate_cursor_req_attributes( + unsigned int cursor_width, + unsigned int cursor_bpp, + + // output + unsigned int *cursor_lines_per_chunk, + unsigned int *cursor_bytes_per_line, + unsigned int *cursor_bytes_per_chunk, + unsigned int *cursor_bytes) +{ + unsigned int cursor_bytes_per_req = 0; + unsigned int cursor_width_bytes = 0; + unsigned int cursor_height = 0; + + //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. + //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B + //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width + + //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. + + cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); + if (cursor_width_bytes <= 64) + cursor_bytes_per_req = 64; + else if (cursor_width_bytes <= 128) + cursor_bytes_per_req = 128; + else + cursor_bytes_per_req = 256; + + //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. + *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); + + //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. + if (cursor_bpp == 2) { + *cursor_lines_per_chunk = 16; + } else if (cursor_bpp == 32) { + if (cursor_width <= 32) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 4; + else + *cursor_lines_per_chunk = 2; + } else if (cursor_bpp == 64) { + if (cursor_width <= 16) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 32) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 4; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 2; + else + *cursor_lines_per_chunk = 1; + } else { + if (cursor_width > 0) { + DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + DML_ASSERT(0); + } + } + + *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; + + // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. + // Only cursor_width is provided for worst case sizing so assume that the cursor is square + cursor_height = cursor_width; + *cursor_bytes = *cursor_bytes_per_line * cursor_height; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); + DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); +#endif +} + +static void calculate_cursor_urgent_burst_factor( + unsigned int CursorBufferSize, + unsigned int CursorWidth, + unsigned int cursor_bytes_per_chunk, + unsigned int cursor_lines_per_chunk, + double LineTime, + double UrgentLatency, + + double *UrgentBurstFactorCursor, + bool *NotEnoughUrgentLatencyHiding) +{ + unsigned int LinesInCursorBuffer = 0; + double CursorBufferSizeInTime = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; + + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 1; + } else { + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + + } +} + +static void CalculateUrgentBurstFactor( + const struct dml2_plane_parameters *plane_cfg, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + // Output + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorLuma = 0; + *UrgentBurstFactorChroma = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + DML_ASSERT(VRatio > 0); + + LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 1; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 1; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif +} + +static void CalculateDCFCLKDeepSleepTdlut( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + double dispclk, + unsigned int tdlut_bytes_to_deliver[], + double prefetch_swath_time_us[], + + // Output + double *DCFClkDeepSleep) +{ + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; + double ReadBandwidth = 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + + // adjust for 3dlut delivery time + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { + double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; + + DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + + // increase the deepsleep dcfclk to match the original dispclk throughput rate + if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} + +static noinline_for_stack void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double zero_double[DML2_MAX_PLANES]; + unsigned int zero_integer[DML2_MAX_PLANES]; + + memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); + memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + NumberOfActiveSurfaces, + BytePerPixelY, + BytePerPixelC, + SwathWidthY, + SwathWidthC, + DPPPerSurface, + PSCL_THROUGHPUT, + PSCL_THROUGHPUT_CHROMA, + Dppclk, + ReadBandwidthLuma, + ReadBandwidthChroma, + ReturnBusWidth, + 0, + zero_integer, //tdlut_bytes_to_deliver, + zero_double, //prefetch_swath_time_us, + + // Output + DCFClkDeepSleep); +} + +static double CalculateWriteBackDelay( + enum dml2_source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static unsigned int CalculateMaxVStartup( + bool ptoi_supported, + unsigned int vblank_nom_default_us, + const struct dml2_timing_cfg *timing, + double write_back_delay_us) +{ + unsigned int vblank_size = 0; + unsigned int max_vstartup_lines = 0; + + double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); + unsigned int vblank_actual = timing->v_total - timing->v_active; + unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); + unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom; + + vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); + + if (timing->interlaced && !ptoi_supported) + max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); + DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); + DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); + return max_vstartup_lines; +} + +static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) +{ + unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 }; + unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 }; + + unsigned int TotalActiveDPP = 0; + bool NoChromaOrLinear = true; + unsigned int SurfaceDoingUnboundedRequest = 0; + unsigned int DETBufferSizeInKByteForSwathCalculation; + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth( + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->surf_linear128_l, + p->surf_linear128_c, + p->DPPPerSurface, + + // Output + p->req_per_swath_ub_l, + p->req_per_swath_ub_c, + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); + p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); +#endif + if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { + p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); + p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha + || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + NoChromaOrLinear = false; + } + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); + + CalculateDETBufferSize( + &scratch->CalculateDETBufferSize_locals, + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + *p->UnboundedRequestEnabled, + p->nomDETInKByte, + p->MaxTotalDETInKByte, + p->ConfigReturnBufferSizeInKByte, + p->MinCompressedBufferSizeInKByte, + p->ConfigReturnBufferSegmentSizeInkByte, + p->CompressedBufferSegmentSizeInkByte, + p->ReadBandwidthLuma, + p->ReadBandwidthChroma, + p->full_swath_bytes_l, + p->full_swath_bytes_c, + p->DPPPerSurface, + + // Output + p->DETBufferSizeInKByte, // per hubp pipe + p->CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + *p->ViewportSizeSupport = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + + if (p->surf_linear128_l[k]) + p->request_size_bytes_luma[k] = 128; + else + p->request_size_bytes_luma[k] = 256; + + if (p->surf_linear128_c[k]) + p->request_size_bytes_chroma[k] = 128; + else + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + + } else { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + } + + if (p->SwathHeightC[k] == 0) + p->request_size_bytes_chroma[k] = 0; + + if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); +#endif + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); +#endif + + *p->hw_debug5 = false; +#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE + if (p->NumberOfActiveSurfaces > 1) + *p->hw_debug5 = true; +#else + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); +#endif + } +#endif +} + +static enum dml2_odm_mode DecideODMMode(unsigned int HActive, + double MaxDispclk, + unsigned int MaximumPixelsPerLinePerDSCUnit, + enum dml2_output_format_class OutFormat, + bool UseDSC, + unsigned int NumberOfDSCSlices, + double SurfaceRequiredDISPCLKWithoutODMCombine, + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + double SurfaceRequiredDISPCLKWithODMCombineFourToOne) +{ + enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock; + enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive; + enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive; + enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass; + + MinimumRequiredODMModeForMaxDispClock = + (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass : + (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 : + (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMaxDispClock) + ODMMode = MinimumRequiredODMModeForMaxDispClock; + + if (UseDSC) { + MinimumRequiredODMModeForMaxDSCHActive = + (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass : + (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 : + (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive) + ODMMode = MinimumRequiredODMModeForMaxDSCHActive; + } + + if (OutFormat == dml2_420) { + MinimumRequiredODMModeForMax420HActive = + (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass : + (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 : + (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMax420HActive) + ODMMode = MinimumRequiredODMModeForMax420HActive; + } + + if (UseDSC) { + if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4) + ODMMode = dml2_odm_mode_combine_2to1; + if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8) + ODMMode = dml2_odm_mode_combine_3to1; + if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12) + ODMMode = dml2_odm_mode_combine_4to1; + } + + return ODMMode; +} + +static void CalculateODMConstraints( + enum dml2_odm_mode ODMUse, + double SurfaceRequiredDISPCLKWithoutODMCombine, + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + double SurfaceRequiredDISPCLKWithODMCombineFourToOne, + unsigned int MaximumPixelsPerLinePerDSCUnit, + /* Output */ + double *DISPCLKRequired, + unsigned int *NumberOfDPPRequired, + unsigned int *MaxHActiveForDSC, + unsigned int *MaxDSCSlices, + unsigned int *MaxHActiveFor420) +{ + switch (ODMUse) { + case dml2_odm_mode_combine_2to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPPRequired = 2; + break; + case dml2_odm_mode_combine_3to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + *NumberOfDPPRequired = 3; + break; + case dml2_odm_mode_combine_4to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPPRequired = 4; + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + case dml2_odm_mode_bypass: + default: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPPRequired = 1; + break; + } + *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit; + *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC; + *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH; +} + +static bool ValidateODMMode(enum dml2_odm_mode ODMMode, + double MaxDispclk, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + bool UseDSC, + unsigned int NumberOfDSCSlices, + unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, + unsigned int MaxNumDPP, + unsigned int MaxNumOPP, + double DISPCLKRequired, + unsigned int NumberOfDPPRequired, + unsigned int MaxHActiveForDSC, + unsigned int MaxDSCSlices, + unsigned int MaxHActiveFor420) +{ + bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true; + bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1); + unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1; + unsigned int h_timing_div_mode = + (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 : + (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle; + + if (DISPCLKRequired > MaxDispclk) + return false; + if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP) + return false; + if (are_odm_segments_symmetrical) { + if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle)) + return false; + } + if (HActive % h_timing_div_mode) + /* + * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and + * OTG_H_SYNC_A_START/END all need to be visible by h timing div + * mode. This logic only checks H active. + */ + return false; + + if (UseDSC) { + if (HActive > MaxHActiveForDSC) + return false; + if (NumberOfDSCSlices > MaxDSCSlices) + return false; + if (HActive % NumberOfDSCSlices) + return false; + if (NumberOfDSCSlices % NumberOfDPPRequired) + return false; + if (is_max_dsc_slice_required) { + if (NumberOfDSCSlices != MaxDSCSlices) + return false; + } + } + + if (OutFormat == dml2_420) { + if (HActive > MaxHActiveFor420) + return false; + } + + return true; +} + +static noinline_for_stack void CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + enum dml2_output_encoder_class Output, + enum dml2_odm_mode ODMUse, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, + unsigned int MaxNumDPP, + unsigned int MaxNumOPP, + double PixelClock, + unsigned int NumberOfDSCSlices, + + // Output + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum dml2_odm_mode *ODMMode, + double *RequiredDISPCLKPerSurface) +{ + double SurfaceRequiredDISPCLKWithoutODMCombine; + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + double SurfaceRequiredDISPCLKWithODMCombineFourToOne; + double DISPCLKRequired; + unsigned int NumberOfDPPRequired; + unsigned int MaxHActiveForDSC; + unsigned int MaxDSCSlices; + unsigned int MaxHActiveFor420; + bool success; + bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); + enum dml2_odm_mode DecidedODMMode; + bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); + DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); +#endif + if (ODMUse == dml2_odm_mode_auto) + DecidedODMMode = DecideODMMode(HActive, + MaxDispclk, + MaximumPixelsPerLinePerDSCUnit, + OutFormat, + UseDSC, + NumberOfDSCSlices, + SurfaceRequiredDISPCLKWithoutODMCombine, + SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + SurfaceRequiredDISPCLKWithODMCombineFourToOne); + else + DecidedODMMode = ODMUse; + CalculateODMConstraints(DecidedODMMode, + SurfaceRequiredDISPCLKWithoutODMCombine, + SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + SurfaceRequiredDISPCLKWithODMCombineFourToOne, + MaximumPixelsPerLinePerDSCUnit, + &DISPCLKRequired, + &NumberOfDPPRequired, + &MaxHActiveForDSC, + &MaxDSCSlices, + &MaxHActiveFor420); + success = ValidateODMMode(DecidedODMMode, + MaxDispclk, + HActive, + OutFormat, + UseDSC, + NumberOfDSCSlices, + TotalNumberOfActiveDPP, + TotalNumberOfActiveOPP, + MaxNumDPP, + MaxNumOPP, + DISPCLKRequired, + NumberOfDPPRequired, + MaxHActiveForDSC, + MaxDSCSlices, + MaxHActiveFor420); + + *ODMMode = DecidedODMMode; + *TotalAvailablePipesSupport = success; + *NumberOfDPP = NumberOfDPPRequired; + *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); +#endif +} + +static noinline_for_stack void CalculateOutputLink( + struct dml2_core_internal_scratch *s, + double PHYCLK, + double PHYCLKD18, + double PHYCLKD32, + double Downspreading, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + enum dml2_dsc_enable_option DSCEnable, + unsigned int OutputLinkDPLanes, + enum dml2_output_link_dp_rate OutputLinkDPRate, + + // Output + bool *RequiresDSC, + bool *RequiresFEC, + double *OutBpp, + enum dml2_core_internal_output_type *OutputType, + enum dml2_core_internal_output_type_rate *OutputRate, + unsigned int *RequiredSlots) +{ + bool LinkDSCEnable; + unsigned int dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml2_core_internal_output_type_unknown; + *OutputRate = dml2_core_internal_output_rate_unknown; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); +#endif + { + if (Output == dml2_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml2_core_internal_output_type_hdmi; + } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp || Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml2_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; + } + } + } else if (Output == dml2_hdmifrl) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + *RequiresFEC = false; + } + *OutBpp = 0; + if (PHYCLKD18 >= 3000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "3x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "8x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "10x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "12x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; + } + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); +#endif +} + +static double CalculateWriteBackDISPCLK( + enum dml2_source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; + return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); +} + +static double RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum dml2_output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout) +{ + if (DSCEnable != true) { + return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); + double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static unsigned int DSCDelayRequirement( + bool DSCEnabled, + enum dml2_odm_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum dml2_output_format_class OutputFormat, + enum dml2_output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd) +{ + unsigned int DSCDelayRequirement_val = 0; + unsigned int NumberOfDSCSlicesFactor = 1; + + if (DSCEnabled == true && OutputBpp != 0) { + + if (ODMMode == dml2_odm_mode_combine_4to1) + NumberOfDSCSlicesFactor = 4; + else if (ODMMode == dml2_odm_mode_combine_3to1) + NumberOfDSCSlicesFactor = 3; + else if (ODMMode == dml2_odm_mode_combine_2to1) + NumberOfDSCSlicesFactor = 2; + + DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), + (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); + + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); + DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static void CalculateSurfaceSizeInMall( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int BytesPerPixelY[], + unsigned int BytesPerPixelC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + // Output + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize) +{ + unsigned int TotalSurfaceSizeInMALLForSS = 0; + unsigned int TotalSurfaceSizeInMALLForSubVP = 0; + unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; + const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; + + if (composition->viewport.stationary) { + SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - + math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * + math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), + math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - + math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - + math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * + math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + } else { + SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + } + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + /* SS and Subvp counted separate as they are never used at the same time */ + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; + else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) + TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; + } + + *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || + (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); +#endif +} + +static void calculate_tdlut_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_tdlut_setting_params *p) +{ + // locals + unsigned int tdlut_bpe = 8; + unsigned int tdlut_width; + unsigned int tdlut_pitch_bytes; + unsigned int tdlut_footprint_bytes; + unsigned int vmpg_bytes; + unsigned int tdlut_vmpg_per_frame; + unsigned int tdlut_pte_req_per_frame; + unsigned int tdlut_bytes_per_line; + double tdlut_drain_rate; + unsigned int tdlut_mpc_width; + unsigned int tdlut_bytes_per_group_simple; + + if (!p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = 0; + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = 0; + *p->tdlut_bytes_to_deliver = 0; + *p->tdlut_bytes_per_group = 0; + *p->tdlut_pte_bytes_per_frame = 0; + *p->tdlut_bytes_per_frame = 0; + return; + } + + if (p->tdlut_mpc_width_flag) { + tdlut_mpc_width = 33; + tdlut_bytes_per_group_simple = 39*256; + } else { + tdlut_mpc_width = 17; + tdlut_bytes_per_group_simple = 10*256; + } + + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 4916; + else + tdlut_width = 35940; + } else { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 17; + else // dml2_tdlut_width_33_cube + tdlut_width = 33; + } + + if (p->is_gfx11) + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment + else + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) + tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; + else + tdlut_footprint_bytes = tdlut_pitch_bytes; + + if (!p->gpuvm_enable) { + tdlut_vmpg_per_frame = 0; + tdlut_pte_req_per_frame = 0; + } else { + tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; + tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; + } + tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request + *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { + //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice + *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; + *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; + //the delivery cycles is DispClk cycles per line * number of lines * number of slices + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); + } else { + //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements + *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); + *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); + tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; + } + + //the tdlut is fetched during the 2 row times of prefetch. + if (p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + + DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); + DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); + DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); +#endif +} + +static void CalculateTarb( + const struct dml2_display_cfg *display_cfg, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + double ReturnBW, + unsigned int MetaChunkSize, + + // output + double *Tarb, + double *Tarb_prefetch) +{ + double extra_bytes = 0; + double extra_bytes_prefetch = 0; + double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); + + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + extra_bytes = extra_bytes + (MetaChunkSize * 1024); + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; + } + + extra_bytes_prefetch = extra_bytes; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (display_cfg->gpuvm_enable == true) { + extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; + } + } + *Tarb = extra_bytes / ReturnBW; + *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); +#endif +} + +static double CalculateTWait( + long reserved_vblank_time_ns, + double UrgentLatency, + double Ttrip, + double g6_temp_read_blackout_us) +{ + double TWait; + double t_urg_trip = math_max2(UrgentLatency, Ttrip); + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} + + +static void CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + // Output + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + unsigned int *VUpdateWidthPix, + unsigned int *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static double get_urgent_bandwidth_required( + struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, + const struct dml2_display_cfg *display_cfg, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool inc_flip_bw, // including flip bw + bool use_qual_row_bw, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double PrefetchBandwidthMax[], + double excess_vactive_fill_bw_l[], + double excess_vactive_fill_bw_c[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + /* outputs */ + double surface_required_bw[], + double surface_peak_required_bw[]) +{ + // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS + // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation + + memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; + l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + + l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; + l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; + l->adj_factor_cur = UrgentBurstFactorCursor[k]; + l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; + l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; + l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); + bool exclude_this_plane = false; + + // Exclude phantom pipe in bw calculation for non svp prefetch state + if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) + exclude_this_plane = true; + + // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip. + // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe. + if (use_qual_row_bw) { + if (display_cfg->hostvm_enable) + l->per_plane_flip_bw[k] = 0; // qual_row_bw + else if (!display_cfg->plane_descriptors[k].immediate_flip) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + } else { + // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM) + if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + else + l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; + } + + if (!exclude_this_plane) { + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max); + + /* export peak required bandwidth for the surface */ + surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif + } else { + surface_required_bw[k] = 0.0; + } + + l->required_bandwidth_mbps += surface_required_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + + DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); +#endif + } + + return l->required_bandwidth_mbps; +} + +static void CalculateExtraLatency( + const struct dml2_display_cfg *display_cfg, + unsigned int ROBBufferSizeInKByte, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + double FabricClock, + unsigned int PixelChunkSizeInKByte, + double ReturnBW, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + enum dml2_qos_param_type qos_type, + bool max_outstanding_when_urgent_expected, + unsigned int max_outstanding_requests, + unsigned int request_size_bytes_luma[], + unsigned int request_size_bytes_chroma[], + unsigned int MetaChunkSize, + unsigned int dchub_arb_to_ret_delay, + double Ttrip, + unsigned int hostvm_mode, + + // output + double *ExtraLatency, // Tex + double *ExtraLatency_sr, // Tex_sr + double *ExtraLatencyPrefetch) + +{ + double Tarb; + double Tarb_prefetch; + double Tex_trips; + unsigned int max_request_size_bytes = 0; + + CalculateTarb( + display_cfg, + PixelChunkSizeInKByte, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + tdlut_bytes_per_group, + HostVMInefficiencyFactor, + HostVMInefficiencyFactorPrefetch, + HostVMMinPageSize, + ReturnBW, + MetaChunkSize, + // output + &Tarb, + &Tarb_prefetch); + + Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (request_size_bytes_luma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_luma[k]; + if (request_size_bytes_chroma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_chroma[k]; + } + + if (qos_type == dml2_qos_param_type_dcn4x) { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; + *ExtraLatency = *ExtraLatency_sr; + if (max_outstanding_when_urgent_expected) + *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; + } else { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; + *ExtraLatency = *ExtraLatency_sr; + } + *ExtraLatency = *ExtraLatency + Tex_trips; + *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; + *ExtraLatency = *ExtraLatency + Tarb; + *ExtraLatency_sr = *ExtraLatency_sr + Tarb; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); + DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); + DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); + DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); + DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); +#endif +} + +static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) +{ + struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; + bool dcc_mrq_enable; + + unsigned int vm_bytes; + unsigned int extra_tdpe_bytes; + unsigned int tdlut_row_bytes; + unsigned int Lo; + + s->NoTimeToPrefetch = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingVM = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + *p->Tvm_trips = 0.0; + *p->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + *p->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + *p->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; + s->Tsw_est3 = 0.0; + s->cursor_prefetch_bytes = 0; + *p->prefetch_cursor_bw = 0; + *p->RequiredPrefetchBWMax = 0.0; + + dcc_mrq_enable = (p->dcc_enable && p->mrq_present); + + s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip) + + if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { + s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->Ttrip; + *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); + if (dcc_mrq_enable) + *p->Tvm_trips_flip = *p->Tvm_trips; + else + *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; + + *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); + + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; + *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; + } else { + *p->Tdmdl_vm = 0; + *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex + } + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); + *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + + DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); +#endif + + if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); +#endif + if (p->display_cfg->gpuvm_enable) { + s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) + s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); + else + s->Tvm_trips_rounded = s->LineTime / 4.0; + *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; + } + + s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); + *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { + s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + s->Tr0_trips_rounded = s->LineTime / 4.0; + *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; + } + s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); + *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true) { + if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { + *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); + } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { + *p->Tno_bw = p->ExtraLatencyPrefetch; + } else { + *p->Tno_bw = 0; + } + } else { + *p->Tno_bw = 0; + } + + if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) + *p->Tno_bw_flip = *p->Tno_bw; + else + *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip + + if (dml_is_420(p->myPipe->SourcePixelFormat)) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; + + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; + extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); + + if (p->setup_for_tdlut) + vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); + + tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + + // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto + // Note: in prefetch calculation, acounting is done mostly per-pipe. + // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time + s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; + + // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) + s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; + + if (p->myPipe->BytePerPixelC > 0) { + s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; + s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; + } + + /* oto prefetch bw should be always be less than total vactive bw */ + //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; + + s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); + + s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; + + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + + /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. + * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule + * and the required bandwidth increases when going from ms to mp + */ + *p->RequiredPrefetchBWMax = s->prefetch_bw_oto; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); +#endif + + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_oto = math_max3( + *p->Tvm_trips, + *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); +#endif + } else { + s->Tvm_oto = s->Tvm_trips_rounded; + } + + if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { + s->Tr0_oto = math_max3( + *p->Tr0_trips, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = s->LineTime / 4.0; + + s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + if (p->impacted_dst_y_pre > 0) { + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + } +#endif + *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; + + //To (time for delay after scaler) in line time + Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + //Tpre_equ in line time + if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; + else + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); + + s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + + if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) + s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; +#endif + + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); + DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); +#endif + s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif + + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + + // Derive bandwidth by finding how much data to move within the time constraint + // Tpre_rounded is Tpre rounding to 2-bit fraction + // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time + // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time + // So that means prefetch bw calculated can be higher since the total time available for prefetch is less + bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + bool tpre_gt_req_latency = true; +#if 0 + // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. + // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. + // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. + tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); +#endif + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { + s->prefetch_bw1 = 0.; + s->prefetch_bw2 = 0.; + s->prefetch_bw3 = 0.; + s->prefetch_bw4 = 0.; + + // prefetch_bw1: VM + 2*R0 + SW + if (*p->Tpre_rounded - *p->Tno_bw > 0) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + + *p->prefetch_sw_bytes) + / (*p->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; + } else + s->prefetch_bw1 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / + (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); + DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); + DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); +#endif + } + + // prefetch_bw2: VM + SW + if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; + } else + s->prefetch_bw2 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + + // prefetch_bw3: 2*R0 + SW + if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; + } else + s->prefetch_bw3 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + } + + // prefetch_bw4: SW + if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw4 = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); + DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); + DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); +#endif + { + bool Case1OK = false; + bool Case2OK = false; + bool Case3OK = false; + + // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement + // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive + // vs the latency based number + + // prefetch_bw1: VM + 2*R0 + SW + // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data) + // here is to make sure equ bw wont be more agressive than the latency-based requirement. + // check vm time >= vm_trips + // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + + if (s->prefetch_bw1 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { + Case1OK = true; + } + } + + // prefetch_bw2: VM + SW + // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw) + // check vm time >= vm_trips + // check r0 time < r0_trips + if (s->prefetch_bw2 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { + Case2OK = true; + } + } + + // prefetch_bw3: VM + 2*R0 + // check vm time < vm_trips + // check r0 time >= r0_trips + if (s->prefetch_bw3 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { + Case3OK = true; + } + } + + if (Case1OK) { + s->prefetch_bw_equ = s->prefetch_bw1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->prefetch_bw2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->prefetch_bw3; + } else { + s->prefetch_bw_equ = s->prefetch_bw4; + } + + s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); + DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); + DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + + if (s->prefetch_bw_equ > 0) { + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { + s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes + *p->Tr0_trips, + s->LineTime / 4); + } else { + s->Tr0_equ = s->LineTime / 4; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); +#endif + // Use the more stressful prefetch schedule + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->dst_y_prefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingVM = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); +#endif + + } else { + *p->dst_y_prefetch = s->dst_y_prefetch_equ; + + if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) + *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; + + s->TimeForFetchingVM = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + + /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming. + * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data + * bandwidth may end up higher than what was calculated in mode support. + */ + *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); +#endif + } + + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + + s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); + *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); + + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); +#endif + DML_ASSERT(*p->dst_y_prefetch < 64); + + unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); + if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, + (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; + *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); +#endif + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + s->NoTimeToPrefetch = true; + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (vm_bytes == 0) { + prefetch_vm_bw = 0; + } else if (*p->dst_y_per_vm_vblank > 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + } + + if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { + prefetch_row_bw = 0; + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + } + + *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->NoTimeToPrefetch) { + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->dst_y_prefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; + } + + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + + return s->NoTimeToPrefetch; +} + +static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, + unsigned int line_buffer_size_bits, + unsigned int num_pipes, + unsigned int vp_width, + unsigned int vp_height, + double h_ratio, + enum dml2_rotation_angle rotation_angle) +{ + unsigned int num_lb_source_lines = 0; + double lb_bit_per_pixel = 57.0; + unsigned recin_width = vp_width/num_pipes; + + if (dml_is_vertical_rotation(rotation_angle)) + recin_width = vp_height/num_pipes; + + num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, + math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); + + return num_lb_source_lines; +} + +static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) +{ + int max_value = -1; + int max_idx = -1; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { + max_value = Trpd_dcfclk_cycles[i]; + max_idx = i; + } + } + if (max_idx <= 0) { + DML_ASSERT(max_idx >= 0); + max_idx = this_plane_idx; + } + + return max_idx; +} + +static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) +{ + double sum = 0.; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != exclude_plane_idx) { + sum += prefetch_swath_bytes[i]; + } + } + return sum / bw_mbps; +} + +// a global check against the aggregate effect of the per plane prefetch schedule +static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) +{ + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; + unsigned int i, k; + + memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); + + *p->recalc_prefetch_schedule = 0; + s->prefetch_global_check_passed = 1; + // worst case if the rob and cdb is fully hogged + s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); +#endif + + // calculate the return impact from each plane, request is 256B per dcfclk + for (i = 0; i < p->num_active_planes; i++) { + s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; + s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; + s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; + s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; + + if (p->pixel_format[i] == dml2_420_10) { + s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); + s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); + s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); + s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); + } + + s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); +#endif + + if (s->src_swath_bytes_c[i] > 0) { // dual_plane + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); + + if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); +#endif + } + + s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate + s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); +#endif + // clamping to worst case delay which is one which occupy the full rob+cdb + if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) + s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; + } + + // Figure out the impacted prefetch time for each plane + // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw + for (i = 0; i < p->num_active_planes; i++) { + k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i + // the rest of planes (except for k) complete for bw + p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; + p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); + p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); +#endif + } + + if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { + for (i = 0; i < p->num_active_planes; i++) { + if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { + s->prefetch_global_check_passed = 0; + *p->recalc_prefetch_schedule = 1; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); +#endif + } + } else { + // likely a mode programming calls, assume support, and no recalc - not used anyways + s->prefetch_global_check_passed = 1; + *p->recalc_prefetch_schedule = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); +#endif + + return s->prefetch_global_check_passed; +} + +static void calculate_peak_bandwidth_required( + struct dml2_core_internal_scratch *s, + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) +{ + unsigned int n; + unsigned int m; + + struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); +#endif + + for (unsigned int k = 0; k < p->num_active_planes; ++k) { + l->unity_array[k] = 1.0; + l->zero_array[k] = 0.0; + } + + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWMax + l->zero_array, + l->zero_array, + l->zero_array, + p->dpte_row_bw, + p->meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + p->surface_avg_vactive_required_bw[m][n], + p->surface_peak_required_bw[m][n]); + + p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWMax + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + p->inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw + 1, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + p->inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); +#endif + DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); + } + } +} + +static void check_urgent_bandwidth_support( + double *frac_urg_bandwidth_nom, + double *frac_urg_bandwidth_mall, + bool *vactive_bandwidth_support_ok, // vactive ok + bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok + + unsigned int mall_allocated_for_dcn_mbytes, + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + double frac_urg_bandwidth_mall_sdp; + double frac_urg_bandwidth_mall_dram; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0) + frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + else + frac_urg_bandwidth_mall_sdp = 0.0; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0) + frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + else + frac_urg_bandwidth_mall_dram = 0.0; + + *bandwidth_support_ok = 1; + *vactive_bandwidth_support_ok = 1; + + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch + + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + + if (mall_allocated_for_dcn_mbytes > 0) { + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + + *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); + *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); + + *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); + + if (mall_allocated_for_dcn_mbytes > 0) + *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); + + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + if (mall_allocated_for_dcn_mbytes > 0) { + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); + } + } +#endif +} + +static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double flip_bw_available_mbps; + double flip_bw_available_sdp_mbps; + double flip_bw_available_dram_mbps; + + flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; + flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; + flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); +#endif + + return flip_bw_available_mbps; +} + +static void calculate_immediate_flip_bandwidth_support( + // Output + double *frac_urg_bandwidth_flip, + bool *flip_bandwidth_support_ok, + + // Input + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; + double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; + + *flip_bandwidth_support_ok = true; + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); +#endif + DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); + } + + *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; + *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); + } + } +#endif +} + +static void CalculateFlipSchedule( + struct dml2_core_internal_scratch *s, + bool iflip_enable, + bool use_lb_flip_bw, + double HostVMInefficiencyFactor, + double Tvm_trips_flip, + double Tr0_trips_flip, + double Tvm_trips_flip_rounded, + double Tr0_trips_flip_rounded, + bool GPUVMEnable, + double vm_bytes, // vm_bytes + double DPTEBytesPerRow, // dpte_row_bytes + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum dml2_source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw_flip, + unsigned int dpte_row_height, + unsigned int dpte_row_height_chroma, + bool use_one_row_for_frame_flip, + unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, + unsigned int per_pipe_flip_bytes, + unsigned int meta_row_bytes, + unsigned int meta_row_height, + unsigned int meta_row_height_chroma, + bool dcc_mrq_enable, + + // Output + double *dst_y_per_vm_flip, + double *dst_y_per_row_flip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; + + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dpte_row_bytes = DPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); +#endif + + if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { + if (l->dual_plane) { + if (dcc_mrq_enable & GPUVMEnable) { + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); + } else if (GPUVMEnable) { + l->min_row_height = dpte_row_height; + l->min_row_height_chroma = dpte_row_height_chroma; + } else { + l->min_row_height = meta_row_height; + l->min_row_height_chroma = meta_row_height_chroma; + } + l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); + } else { + if (dcc_mrq_enable & GPUVMEnable) + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + else if (GPUVMEnable) + l->min_row_height = dpte_row_height; + else + l->min_row_height = meta_row_height; + + l->min_row_time = l->min_row_height * LineTime / VRatio; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); +#endif + DML_ASSERT(l->min_row_time > 0); + + if (use_lb_flip_bw) { + // For mode check, calculation the flip bw requirement with worst case flip time + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + + //The lower bound on flip bandwidth + // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required + l->lb_flip_bw = 0; + + if (iflip_enable) { + l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; + l->num_rows = 2; + l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); + l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; + l->lb_flip_bw = math_max3( + l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), + l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), + l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); + DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + + if (l->lb_flip_bw > 0) { + DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); + } +#endif + l->lb_flip_bw = math_max3(l->lb_flip_bw, + l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); +#endif + } + + *final_flip_bw = l->lb_flip_bw; + + *dst_y_per_vm_flip = 1; // not used + *dst_y_per_row_flip = 1; // not used + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); + } else { + if (iflip_enable) { + l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); + DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); +#endif + if (l->ImmediateFlipBW == 0) { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + } else { + l->Tvm_flip = math_max3(Tvm_trips_flip, + Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, + LineTime / 4.0); + + l->Tr0_flip = math_max3(Tr0_trips_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, + LineTime / 4.0); + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); +#endif + *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; + *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; + + *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); + + if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + +#ifdef __DML_VBA_DEBUG__ + if (!use_lb_flip_bw) { + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); + } + DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) +{ + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + double reserved_vblank_time_us; + bool FoundCriticalSurface = false; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); +#endif + + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } + p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); +#endif + + s->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequired) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequired) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequired) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); + } + + *p->global_fclk_change_supported = true; + *p->global_dram_clock_change_supported = true; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; + double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; + double LBBitPerPixel = 57; + + s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); + s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); + DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; + } + s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; + + if (p->VActiveLatencyHidingMargin) + p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + + if (p->VActiveLatencyHidingUs) + p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; + + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + + uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; + reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; + + p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; + if (s->ActiveFCLKChangeLatencyMargin[k] > 0) + p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) + p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; + + if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) + *p->global_fclk_change_supported = false; + + p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; + if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; + else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; + } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; + + if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) + *p->global_dram_clock_change_supported = false; + + s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); + s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); + s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); + s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; + + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); + else + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + + *p->g6_temp_read_support = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && + (s->g6_temp_read_latency_margin[k] < 0)) { + *p->g6_temp_read_support = false; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} + +static void calculate_bytes_to_fetch_required_to_hide_latency( + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p) +{ + unsigned int dst_lines_to_hide; + unsigned int src_lines_to_hide_l; + unsigned int src_lines_to_hide_c; + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index])) + continue; + + stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index; + + dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] / + ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total / + (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0)); + + src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide, + p->swath_height_l[plane_index]); + p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index]; + + src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide, + p->swath_height_c[plane_index]); + p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index]; + + if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) { + p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index]; + if (p->meta_row_height_c[plane_index]) { + p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index]; + } + } + + if (p->display_cfg->gpuvm_enable == true) { + p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index]; + if (p->dpte_row_height_c[plane_index]) { + p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index]; + } + } + } +} + +static noinline_for_stack void calculate_vactive_det_fill_latency( + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + unsigned int bytes_required_l[], + unsigned int bytes_required_c[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double surface_read_bw_l[], + double surface_read_bw_c[], + double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], + double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], + /* output */ + double vactive_det_fill_delay_us[]) +{ + double effective_excess_bandwidth; + double effective_excess_bandwidth_l; + double effective_excess_bandwidth_c; + double adj_factor; + unsigned int plane_index; + unsigned int soc_state; + unsigned int bw_type; + + for (plane_index = 0; plane_index < num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) + continue; + + vactive_det_fill_delay_us[plane_index] = 0.0; + for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) { + for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) { + effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]); + + /* luma */ + adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0; + + effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; + if (effective_excess_bandwidth_l > 0.0) { + vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l); + } + + /* chroma */ + adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0; + + effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; + if (effective_excess_bandwidth_c > 0.0) { + vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c); + } + } + } + } +} + +static void calculate_excess_vactive_bandwidth_required( + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + unsigned int bytes_required_l[], + unsigned int bytes_required_c[], + /* outputs */ + double excess_vactive_fill_bw_l[], + double excess_vactive_fill_bw_c[]) +{ + unsigned int plane_index; + + for (plane_index = 0; plane_index < num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) + continue; + + excess_vactive_fill_bw_l[plane_index] = 0.0; + excess_vactive_fill_bw_c[plane_index] = 0.0; + + if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) { + excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; + excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; + } + } +} + +static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + double bw_mbps = 0; + bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return bw_mbps; +} + +static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_mhz = 0; + + uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return uclk_mhz; +} + +static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); + return index; +} + +static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = false; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = true; + break; + } + } + + if (!clk_entry_found) + DML_ASSERT(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +static unsigned int get_pipe_flip_bytes( + double hostvm_inefficiency_factor, + unsigned int vm_bytes, + unsigned int dpte_row_bytes, + unsigned int meta_row_bytes) +{ + unsigned int flip_bytes = 0; + + flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes); + flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor); + + return flip_bytes; +} + +static void calculate_hostvm_inefficiency_factor( + double *HostVMInefficiencyFactor, + double *HostVMInefficiencyFactorPrefetch, + + bool gpuvm_enable, + bool hostvm_enable, + unsigned int remote_iommu_outstanding_translations, + unsigned int max_outstanding_reqs, + double urg_bandwidth_avail_active_pixel_and_vm, + double urg_bandwidth_avail_active_vm_only) +{ + *HostVMInefficiencyFactor = 1; + *HostVMInefficiencyFactorPrefetch = 1; + + if (gpuvm_enable && hostvm_enable) { + *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; + *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; + + if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) + *HostVMInefficiencyFactorPrefetch = 4; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); +#endif + } +} + +struct dml2_core_internal_g6_temp_read_blackouts_table { + struct { + unsigned int uclk_khz; + unsigned int blackout_us; + } entries[DML_MAX_CLK_TABLE_SIZE]; +}; + +struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { + .entries = { + { + .uclk_khz = 96000, + .blackout_us = 23, + }, + { + .uclk_khz = 435000, + .blackout_us = 10, + }, + { + .uclk_khz = 521000, + .blackout_us = 10, + }, + { + .uclk_khz = 731000, + .blackout_us = 8, + }, + { + .uclk_khz = 822000, + .blackout_us = 8, + }, + { + .uclk_khz = 962000, + .blackout_us = 5, + }, + { + .uclk_khz = 1069000, + .blackout_us = 5, + }, + { + .uclk_khz = 1187000, + .blackout_us = 5, + }, + }, +}; + +static double get_g6_temp_read_blackout_us( + struct dml2_soc_bb *soc, + unsigned int uclk_freq_khz, + unsigned int min_clk_index) +{ + unsigned int i; + unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; + + if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) { + /* overrides are present in the SoC BB */ + return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index]; + } + + /* use internal table */ + blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz || + core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) { + break; + } + + blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us; + } + + return (double)blackout_us; +} + +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + +static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, + const struct dml2_display_cfg *display_cfg) +{ + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + + double min_return_bw_for_latency; + unsigned int k; + + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { + mode_lib->ms.support.PrefetchSupported = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); + + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + s->line_times[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); + + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; + s->mSOCParameters.SMNLatency = 0; + s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); + +} + + +static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + + double outstanding_latency_us = 0; + + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; + unsigned int k, m, n; + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); + + mode_lib->ms.num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->ms.state_idx = in_out_params->min_clk_index; + mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); + mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); + mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); + mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; + mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; + mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); + mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); + +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + // Many core tests are still setting scaling parameters "incorrectly" + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false + && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 + || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k], + &mode_lib->ms.surf_linear128_l[k], + &mode_lib->ms.surf_linear128_c[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * + display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); +#endif + } + + // Writeback bandwidth + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k][0] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && + (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + + /* Writeback Scale Ratio and Taps Support Check */ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + // Max Viewport Size support + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + s->MaximumSwathWidthSupportLuma = 15360; + } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video + s->MaximumSwathWidthSupportLuma = 7680 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video + s->MaximumSwathWidthSupportLuma = 4320 + 16; + } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha + s->MaximumSwathWidthSupportLuma = 5120 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp + s->MaximumSwathWidthSupportLuma = 3072 + 16; + } else { + s->MaximumSwathWidthSupportLuma = 6144 + 16; + } + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + + unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits; + unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits; + +/* +#if defined(DV_BUILD) + // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. + if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { + lb_buffer_size_bits_luma = 34620 * 57; + lb_buffer_size_bits_chroma = 13560 * 57; + } +#endif +*/ + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); + } + + mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /* Cursor Support Check */ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) + mode_lib->ms.support.CursorSupport = false; + } + } + + /* Valid Pitch Check */ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + // data pitch + unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; + + if (mode_lib->ms.surf_linear128_l[k]) + alignment_l = alignment_l / 2; + + mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; + + if (mode_lib->ms.surf_linear128_c[k]) + alignment_c = alignment_c / 2; + mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); + } else { + mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; + } + + if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || + mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { + mode_lib->ms.support.PitchSupport = false; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); +#endif + } + + // meta pitch + if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) + mode_lib->ms.support.PitchSupport = false; + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, + display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) + mode_lib->ms.support.PitchSupport = false; + } + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); +#endif + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + + // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; + mode_lib->ms.support.TotalAvailablePipesSupport = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + /*Number Of DSC Slices*/ + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) { + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) + mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices; + else { + if (s->PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); + } else if (s->PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (s->PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (s->PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, + mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, + mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + CalculateOutputLink( + &mode_lib->scratch, + ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), + mode_lib->soc.phy_downspread_percent, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + s->PixelClockBackEnd[k], + s->OutputBpp[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, + s->ODMModeNoDSC, + s->ODMModeDSC, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBpp[k], + &mode_lib->ms.OutputType[k], + &mode_lib->ms.OutputRate[k], + &mode_lib->ms.RequiredSlots[k]); + + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMMode[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); +#endif + + // ensure the number dsc slices is integer multiple based on ODM mode + mode_lib->ms.support.DSCSlicesODMModeSupported = true; + if (mode_lib->ms.RequiresDSC[k]) { + // fail a ms check if the override num_slices doesn't align with odm mode setting + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) { + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { + DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + } +#endif + } else { + // safe guard to ensure the dml derived dsc slices and odm setting are compatible + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 12; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); + } + + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + } + + mode_lib->ms.support.incorrect_imall_usage = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mode_lib->ms.support.incorrect_imall_usage = 1; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + mode_lib->ms.NoOfOPP[k] = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 4; + mode_lib->ms.NoOfOPP[k] = 4; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 3; + mode_lib->ms.NoOfOPP[k] = 3; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.NoOfOPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + } + } else { + if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + } + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); +#endif + } + + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k]; + } + if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; + for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ip.writeback_line_buffer_buffer_size)); + } + } + + mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); + } + + mode_lib->ms.GlobalDPPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); + + /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + s->TotalNumberOfActiveWriteback = 0; + memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { + s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) + s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup + //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + //} + } + } + } + } + + /* Writeback Mode Support Check */ + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); + mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); + + + mode_lib->ms.support.ExceededMultistreamSlots = false; + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || + ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1) { + // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) + // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // } + //} + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //} + } + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl && + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + s->PixelClockBackEnd[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + mode_lib->ms.OutputBpp[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); + + if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } else { + /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus + * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider + * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK + * required - by setting phantom dtbclk to 0 we ignore it. + */ + mode_lib->ms.RequiredDTBCLK[k] = 0; + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { + s->DSCFormatFactor = 2; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { + s->DSCFormatFactor = 1; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; + if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + } + } + } + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { + s->NumDSCUnitRequired = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->NumDSCUnitRequired = 4; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->NumDSCUnitRequired = 3; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->NumDSCUnitRequired = 2; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; + + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; + } + + mode_lib->ms.support.NotEnoughDSCUnits = false; + if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Figure out the swath and DET configuration after the num dpp per plane is figured out + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.SurfaceSizeInMALL[k] = 0; + mode_lib->ms.support.ExceededMALLSize = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; + + s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; + CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; + CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; + CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; + CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + mode_lib->ms.support.PTEBufferSizeNotExceeded = true; + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded = false; + + if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); +#endif + + /* VActive bytes to fetch for UCLK P-State */ + calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; + calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + + /* outputs */ + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; + + calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); + + /* Excess VActive bandwidth required to fill DET */ + calculate_excess_vactive_bandwidth_required( + display_cfg, + mode_lib->ms.num_active_planes, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], + /* outputs */ + mode_lib->ms.excess_vactive_fill_bw_l, + mode_lib->ms.excess_vactive_fill_bw_c); + + mode_lib->ms.UrgLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = CalculateTripToMemory( + mode_lib->ms.UrgLatency, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool cursor_not_enough_urgent_latency_hiding = false; + + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } + + mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); +#endif + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + + // Output + &mode_lib->ms.UrgentBurstFactorLuma[k], + &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + CalculateDCFCLKDeepSleep( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + } + + // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + s->MaximumVStartup[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->ms.WritebackDelayTime[k]); + mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); +#endif + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequired = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; + } + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || + ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && + (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + s->FullFrameMALLPStateMethod = true; + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { + s->SubViewportMALLPStateMethod = true; + if (!display_cfg->overrides.enable_subvp_implicit_pmo) { + // For dv, small frame tests will have very high refresh rate + unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + if (refresh_rate > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + } + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || + (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); +#endif + + mode_lib->ms.support.OutstandingRequestsSupport = true; + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; + + mode_lib->ms.support.avg_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.avg_non_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); +#endif + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); +#endif + } + } + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; + + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + mode_lib->ms.num_active_planes); + } + + // Calculate all the bandwidth available + // Need anothe bw for latency evaluation + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, // not used + mode_lib->ms.support.avg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_min_latency, + mode_lib->ms.support.urg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.dram_bw_mbps); + + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, + mode_lib->ms.support.avg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_min, + mode_lib->ms.support.urg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_vm_only, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.MaxDCFCLK, + mode_lib->ms.MaxFabricClock, +#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW + mode_lib->ms.dram_bw_mbps); +#else + mode_lib->ms.max_dram_bw_mbps); +#endif + + // Average BW support check + calculate_avg_bandwidth_required( + mode_lib->ms.support.avg_bandwidth_required, + // input + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->ms.cursor_bw, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + mode_lib->ms.mall_prefetch_sdp_overhead_factor); + + for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); + } + + mode_lib->ms.support.AvgBandwidthSupport = true; + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; + DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + + } + } + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { + mode_lib->ms.support.AvgBandwidthSupport = false; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); +#endif + } + } + } + + dml_core_ms_prefetch_check(mode_lib, display_cfg); + + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; + + //Re-ordering Buffer Support Check + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } else { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } + + /* VActive fill time calculations (informative) */ + calculate_vactive_det_fill_latency( + display_cfg, + mode_lib->ms.num_active_planes, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->ms.surface_avg_vactive_required_bw, + mode_lib->ms.surface_peak_required_bw, + /* outputs */ + mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); +#endif + + /*Mode Support, Voltage State and SOC Configuration*/ + { + if (mode_lib->ms.support.ScaleRatioAndTapsSupport + && mode_lib->ms.support.SourceFormatPixelAndScanSupport + && mode_lib->ms.support.ViewportSizeSupport + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSC422NativeNotSupported + && mode_lib->ms.support.DSCSlicesODMModeSupported + && !mode_lib->ms.support.NotEnoughDSCUnits + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && mode_lib->ms.support.ROBSupport + && mode_lib->ms.support.OutstandingRequestsSupport + && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance + && mode_lib->ms.support.DISPCLK_DPPCLK_Support + && mode_lib->ms.support.TotalAvailablePipesSupport + && mode_lib->ms.support.NumberOfOTGSupport + && mode_lib->ms.support.NumberOfHDMIFRLSupport + && mode_lib->ms.support.NumberOfDP2p0Support + && mode_lib->ms.support.EnoughWritebackUnits + && mode_lib->ms.support.WritebackLatencySupport + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport + && mode_lib->ms.support.CursorSupport + && mode_lib->ms.support.PitchSupport + && !mode_lib->ms.support.ViewportExceedsSurface + && mode_lib->ms.support.PrefetchSupported + && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport + && mode_lib->ms.support.AvgBandwidthSupport + && mode_lib->ms.support.DynamicMetadataSupported + && mode_lib->ms.support.VRatioInPrefetchSupported + && mode_lib->ms.support.PTEBufferSizeNotExceeded + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded + && !mode_lib->ms.support.ExceededMALLSize + && mode_lib->ms.support.g6_temp_read_support + && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { + DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport = true; + } else { + DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport = false; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; + +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); +#endif + } + +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.ModeSupport) + dml2_print_mode_support_info(&mode_lib->ms.support, true); + + DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); +#endif + + return mode_lib->ms.support.ModeSupport; +} + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + unsigned int result; + + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); + result = dml_core_mode_support(in_out_params); + + if (result) + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + + DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); + + return result; +} + +static void CalculatePixelDeliveryTimes( + const struct dml2_display_cfg *display_cfg, + const struct core_display_cfg_support_info *cfg_support_info, + unsigned int NumberOfActiveSurfaces, + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + + // Output + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) +{ + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); +#endif + } +} + +static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + double pixel_clock_mhz; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { + meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; + meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * + p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + if (p->BytePerPixelC[k] == 0) { + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + } + } else { + p->TimePerMetaChunkNominal[k] = 0; + p->TimePerMetaChunkVBlank[k] = 0; + p->TimePerMetaChunkFlip[k] = 0; + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); +#endif + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; + else + p->time_per_tdlut_group[k] = 0; + + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + + if (p->display_cfg->gpuvm_enable == true) { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); + } + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); + } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + if (p->BytePerPixelC[k] == 0) { + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); + } + + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); + } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + } + } else { + p->time_per_pte_group_nom_luma[k] = 0; + p->time_per_pte_group_vblank_luma[k] = 0; + p->time_per_pte_group_flip_luma[k] = 0; + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelC[], + double dst_y_per_vm_vblank[], + double dst_y_per_vm_flip[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int tdlut_pte_bytes_per_frame[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + bool mrq_present, + + // Output + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int num_group_per_lower_vm_stage = 0; + unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage_flip; + unsigned int num_group_per_lower_vm_stage_pref; + unsigned int num_req_per_lower_vm_stage_flip; + unsigned int num_req_per_lower_vm_stage_pref; + double line_time; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (display_cfg->gpuvm_enable) { + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + + if (BytePerPixelC[k] > 0) + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } + + if (dcc_mrq_enable) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)); + } + } + + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); + if (display_cfg->gpuvm_max_page_table_levels >= 2) + num_group_per_lower_vm_stage_pref += 1; // tdpe0 group + } + + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; + } + + if (dcc_mrq_enable) { + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; + } + + num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; + num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; + } + + line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; + + if (num_req_per_lower_vm_stage_pref > 0) + TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; + else + TimePerVMRequestVBlank[k] = 0.0; + if (num_req_per_lower_vm_stage_flip > 0) + TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; + else + TimePerVMRequestFlip[k] = 0.0; + + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); + + if (display_cfg->gpuvm_max_page_table_levels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} + +static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateStutterEfficiency_params *p) +{ + struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; + + unsigned int TotalNumberOfActiveOTG = 0; + double SinglePixelClock = 0; + unsigned int SingleHTotal = 0; + unsigned int SingleVTotal = 0; + bool SameTiming = true; + bool FoundCriticalSurface = false; + + memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionLuma = 2; + } else { + l->MaximumEffectiveCompressionLuma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionChroma = 2; + } else { + l->MaximumEffectiveCompressionChroma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; + } + } else { + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; + l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); +#endif + if (l->AverageDCCZeroSizeFraction == 1) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; + + + } else if (l->AverageDCCZeroSizeFraction > 0) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + + (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) + : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); +#endif + } else { + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + + ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); + DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); +#endif + + *p->StutterPeriod = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); + l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); +#endif + + if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { + bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = l->DETBufferingTimeY; + l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; + l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; + l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); +#endif + } + } + } + + // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer + // stutter period is calculated only on the det sizing + // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress + // else + // the cdb + rob part will be in compressed rate with urg bw (idea bw) + // the det part will be return at uncompressed rate with 64B/dcfclk + // + // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" + // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk + + l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer + / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); +#endif + l->TotalActiveWriteback = 0; + memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { + + if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) + l->TotalActiveWriteback = l->TotalActiveWriteback + 1; + + if (TotalNumberOfActiveOTG == 0) { // first otg + SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; + } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || + SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || + SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1; + } + } + } + + if (l->TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } else { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->StutterEfficiency = 0; + *p->NumberOfStutterBurstsPerFrame = 0; + } + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } else { + *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->Z8StutterEfficiency = 0.; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); + DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); + DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); + DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} + +static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + struct dml2_display_cfg_programming *programming = in_out_params->programming; + + struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; + + unsigned int k; + bool must_support_iflip; + const long min_return_uclk_cycles = 83; + const long min_return_fclk_cycles = 75; + const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; + double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; + double max_uclk_mhz = 0; + double min_return_latency_in_DCFCLK_cycles = 0; + + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); + + s->num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); + dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); + + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || + cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || + cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + + if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + + switch (cfg_support_info->stream_support_info[stream_index].odms_used) { + case (4): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; + break; + case (3): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; + break; + case (2): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; + break; + default: + if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; + else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; + break; + } + } + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; + DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); + } + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; + DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + } + + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; + + DML_ASSERT(mode_lib->mp.Dcfclk > 0); + DML_ASSERT(mode_lib->mp.FabricClock > 0); + DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); + DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); + DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); + DML_ASSERT(mode_lib->mp.Dispclk > 0); + DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); + DML_ASSERT(s->SOCCLK > 0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + for (k = 0; k < s->num_active_planes; ++k) { + DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + } + DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { + DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + } + + for (k = 0; k < s->num_active_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &s->MaxTotalDETInKByte, + &s->NomDETInKByte, + &s->MinCompressedBufferSizeInKByte); + + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + + /* Output */ + &mode_lib->mp.PSCL_THROUGHPUT[k], + &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], + &mode_lib->mp.DPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + // Output + &mode_lib->mp.BytePerPixelY[k], + &mode_lib->mp.BytePerPixelC[k], + &mode_lib->mp.BytePerPixelInDETY[k], + &mode_lib->mp.BytePerPixelInDETC[k], + &mode_lib->mp.Read256BlockHeightY[k], + &mode_lib->mp.Read256BlockHeightC[k], + &mode_lib->mp.Read256BlockWidthY[k], + &mode_lib->mp.Read256BlockWidthC[k], + &mode_lib->mp.MacroTileHeightY[k], + &mode_lib->mp.MacroTileHeightC[k], + &mode_lib->mp.MacroTileWidthY[k], + &mode_lib->mp.MacroTileWidthC[k], + &mode_lib->mp.surf_linear128_l[k], + &mode_lib->mp.surf_linear128_c[k]); + } + + CalculateSwathWidth( + display_cfg, + false, // ForceSingleDPP + s->num_active_planes, + mode_lib->mp.ODMMode, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.surf_linear128_l, + mode_lib->mp.surf_linear128_c, + mode_lib->mp.NoOfDPP, + + /* Output */ + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + mode_lib->mp.SwathWidthSingleDPPY, + mode_lib->mp.SwathWidthSingleDPPC, + mode_lib->mp.SwathWidthY, + mode_lib->mp.SwathWidthC, + s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] + s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub); + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + + // Calculate DET size, swath height here. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + // DSC Delay + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, + mode_lib->mp.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Prefetch + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < s->num_active_planes; ++k) + mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + s->num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.MacroTileWidthY, + mode_lib->mp.MacroTileWidthC, + mode_lib->mp.MacroTileHeightY, + mode_lib->mp.MacroTileHeightC, + + /* Output */ + mode_lib->mp.SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; + s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; + CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; + CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; + CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < s->num_active_planes; k++) { + mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < s->num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + s->num_active_planes); + } + + // Calculate all the bandwidth availabe + calculate_bandwidth_available( + mode_lib->mp.avg_bandwidth_available_min, + mode_lib->mp.avg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_min, + mode_lib->mp.urg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_vm_only, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->mp.dram_bw_mbps); + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < s->num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; + } + // Calculate tdlut schedule related terms + for (k = 0; k <= s->num_active_planes - 1; k++) { + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.request_size_bytes_luma, + mode_lib->mp.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->mp.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->mp.ExtraLatency, + &mode_lib->mp.ExtraLatency_sr, + &mode_lib->mp.ExtraLatencyPrefetch); + + mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->mp.WritebackDelay[k] = + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; + } else + mode_lib->mp.WritebackDelay[k] = 0; + } + + /* VActive bytes to fetch for UCLK P-State */ + calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; + calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes; + calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + + /* outputs */ + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; + + calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); + + /* Excess VActive bandwidth required to fill DET */ + calculate_excess_vactive_bandwidth_required( + display_cfg, + s->num_active_planes, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], + /* outputs */ + mode_lib->mp.excess_vactive_fill_bw_l, + mode_lib->mp.excess_vactive_fill_bw_c); + + mode_lib->mp.UrgentLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = CalculateTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); + + mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + for (k = 0; k < s->num_active_planes; ++k) { + bool cursor_not_enough_urgent_latency_hiding = false; + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + s->line_times[k], + mode_lib->mp.UrgentLatency, + + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } + mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + s->line_times[k], + mode_lib->mp.UrgentLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + + /* output */ + &mode_lib->mp.UrgentBurstFactorLuma[k], + &mode_lib->mp.UrgentBurstFactorChroma[k], + &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->mp.WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); +#endif + } + + s->immediate_flip_required = false; + for (k = 0; k < s->num_active_planes; ++k) { + s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); +#endif + + if (s->num_active_planes > 1) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming + } + + { + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + mode_lib->mp.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); + + myPipe->Dppclk = mode_lib->mp.Dppclk[k]; + myPipe->Dispclk = mode_lib->mp.Dispclk; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->mp.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; + + mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + if (s->impacted_dst_y_pre[k] > 0) + mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; + else + mode_lib->mp.impacted_prefetch_margin_us[k] = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); +#endif + mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; + } // for k + + mode_lib->mp.PrefetchModeSupported = true; + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NoTimeToPrefetch[k] == true || + mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || + mode_lib->mp.DSTYAfterScaler[k] > 8) { + DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + if (mode_lib->mp.dst_y_prefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + s->VRatioPrefetchMoreThanMax = true; + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } + + if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + + if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + mode_lib->mp.PrefetchModeSupported = false; + } + + DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); + + // Prefetch schedule OK, now check prefetch bw + if (mode_lib->mp.PrefetchModeSupported == true) { + for (k = 0; k < s->num_active_planes; ++k) { + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + mode_lib->mp.VRatioPrefetchY[k], + mode_lib->mp.VRatioPrefetchC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + /* Output */ + &mode_lib->mp.UrgentBurstFactorLumaPre[k], + &mode_lib->mp.UrgentBurstFactorChromaPre[k], + &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); +#endif + } + + for (k = 0; k <= s->num_active_planes - 1; k++) + mode_lib->mp.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth + &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall + &s->dummy_boolean[1], // vactive bw ok + &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.non_urg_bandwidth_required, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.PrefetchModeSupported) + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + } // prefetch schedule ok + + // Prefetch schedule and prefetch bw ok, now check flip bw + if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw + + mode_lib->mp.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_qual, // no flip + mode_lib->mp.urg_bandwidth_available); + mode_lib->mp.TotImmediateFlipBytes = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); +#endif + } + for (k = 0; k < s->num_active_planes; ++k) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 0, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.BandwidthAvailableForImmediateFlip, + mode_lib->mp.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.Tno_bw[k], + mode_lib->mp.dpte_row_height[k], + mode_lib->mp.dpte_row_height_chroma[k], + mode_lib->mp.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->mp.meta_row_bytes[k], + mode_lib->mp.meta_row_height[k], + mode_lib->mp.meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + // Output + &mode_lib->mp.dst_y_per_vm_flip[k], + &mode_lib->mp.dst_y_per_row_flip[k], + &mode_lib->mp.final_flip_bw[k], + &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip + &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok + + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.ImmediateFlipSupported) + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->mp.ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); +#endif + } + } + } else { // flip or prefetch not support + mode_lib->mp.ImmediateFlipSupported = false; + } + + // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) + must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; + mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + for (k = 0; k < s->num_active_planes; ++k) + DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); +#endif + DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + + if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { + DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + } else { + DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + + // DCC Configuration + for (k = 0; k < s->num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + display_cfg->plane_descriptors[k].surface.dcc.enable, + display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.plane0.width, + display_cfg->plane_descriptors[k].surface.plane1.width, + display_cfg->plane_descriptors[k].surface.plane0.height, + display_cfg->plane_descriptors[k].surface.plane1.height, + s->NomDETInKByte, + mode_lib->mp.Read256BlockHeightY[k], + mode_lib->mp.Read256BlockHeightC[k], + display_cfg->plane_descriptors[k].surface.tiling, + mode_lib->mp.BytePerPixelY[k], + mode_lib->mp.BytePerPixelC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + display_cfg->plane_descriptors[k].composition.rotation_angle, + + /* Output */ + &mode_lib->mp.RequestLuma[k], + &mode_lib->mp.RequestChroma[k], + &mode_lib->mp.DCCYMaxUncompressedBlock[k], + &mode_lib->mp.DCCCMaxUncompressedBlock[k], + &mode_lib->mp.DCCYMaxCompressedBlock[k], + &mode_lib->mp.DCCCMaxCompressedBlock[k], + &mode_lib->mp.DCCYIndependentBlock[k], + &mode_lib->mp.DCCCIndependentBlock[k]); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; + s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; + s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; + s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.USRRetrainingLatency = 0; + s->mmSOCParameters.SMNLatency = 0; + s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = s->SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; + CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; + CalculateWatermarks_params->VActiveLatencyHidingUs = 0; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); + } else { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + + DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + display_cfg, + cfg_support_info, + s->num_active_planes, + mode_lib->mp.VRatioPrefetchY, + mode_lib->mp.VRatioPrefetchC, + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub, + mode_lib->mp.PSCL_THROUGHPUT, + mode_lib->mp.PSCL_THROUGHPUT_CHROMA, + mode_lib->mp.Dppclk, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + + /* Output */ + mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, + mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, + mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); + + CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; + CalculateMetaAndPTETimes_params->display_cfg = display_cfg; + CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; + CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; + CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; + CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; + CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; + CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; + CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; + CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; + CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; + CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; + CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; + CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + + CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; + CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; + + CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); + + CalculateVMGroupAndRequestTimes( + display_cfg, + s->num_active_planes, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.dst_y_per_vm_vblank, + mode_lib->mp.dst_y_per_vm_flip, + mode_lib->mp.dpte_row_width_luma_ub, + mode_lib->mp.dpte_row_width_chroma_ub, + mode_lib->mp.vm_group_bytes, + mode_lib->mp.dpde0_bytes_per_frame_ub_l, + mode_lib->mp.dpde0_bytes_per_frame_ub_c, + s->tdlut_pte_bytes_per_frame, + mode_lib->mp.meta_pte_bytes_per_frame_ub_l, + mode_lib->mp.meta_pte_bytes_per_frame_ub_c, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + mode_lib->mp.TimePerVMGroupVBlank, + mode_lib->mp.TimePerVMGroupFlip, + mode_lib->mp.TimePerVMRequestVBlank, + mode_lib->mp.TimePerVMRequestFlip); + + // VStartup Adjustment + for (k = 0; k < s->num_active_planes; ++k) { + bool isInterlaceTiming; + + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; + if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + + mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; + if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { + mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; + } + + isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + + // The actual positioning of the vstartup + mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; + s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; + + if (s->blank_lines_remaining < 0) { + DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + DML_ASSERT(0); + } + mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= + (isInterlaceTiming ? + math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + mode_lib->mp.TotalWRBandwidth = 0; + for (k = 0; k < display_cfg->num_streams; ++k) { + s->WRBandwidth = 0; + if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { + s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / + (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height + / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) + * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); + mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth; + } + } + + mode_lib->mp.TotalDataReadBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); +#endif + } + + CalculateStutterEfficiency_params->display_cfg = display_cfg; + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; + CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; + + // output + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; + + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); +#else + mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; + mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; +#endif + } // PrefetchAndImmediateFlipSupported + + max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; + min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; + mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; + mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); + DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); +#endif + return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); +} + +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); + bool result = dml_core_mode_programming(in_out_params); + + DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); + return result; +} + +void dml2_core_calcs_get_dpte_row_height( + unsigned int *dpte_row_height, + struct dml2_core_internal_display_mode_lib *mode_lib, + bool is_plane1, + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + enum dml2_rotation_angle ScanDirection, + unsigned int pitch, + unsigned int GPUVMMinPageSizeKBytes) +{ + unsigned int BytePerPixelY; + unsigned int BytePerPixelC; + double BytePerPixelInDETY; + double BytePerPixelInDETC; + unsigned int BlockHeight256BytesY; + unsigned int BlockHeight256BytesC; + unsigned int BlockWidth256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int MacroTileWidthY; + unsigned int MacroTileWidthC; + unsigned int MacroTileHeightY; + unsigned int MacroTileHeightC; + bool surf_linear_128_l = false; + bool surf_linear_128_c = false; + + CalculateBytePerPixelAndBlockSizes( + SourcePixelFormat, + SurfaceTiling, + pitch, + pitch, + + /* Output */ + &BytePerPixelY, + &BytePerPixelC, + &BytePerPixelInDETY, + &BytePerPixelInDETC, + &BlockHeight256BytesY, + &BlockHeight256BytesC, + &BlockWidth256BytesY, + &BlockWidth256BytesC, + &MacroTileHeightY, + &MacroTileHeightC, + &MacroTileWidthY, + &MacroTileWidthC, + &surf_linear_128_l, + &surf_linear_128_c); + + unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; + unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; + unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; + unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; + unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; + unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); + DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); +#endif + unsigned int dummy_integer[21]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel; + mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests; + mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight; + mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20]; + + // just supply with enough parameters to calculate dpte + CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); +#endif +} + +static bool is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = false; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = true; + + return ret_val; +} + +static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz); + wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz); + wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); +} + +static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) +{ + int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - + (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); + cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); +#endif + + cursor_dlg_regs->chunk_hdl_adjust = 3; + cursor_dlg_regs->dst_y_offset = 0; + + cursor_dlg_regs->qos_level_fixed = 8; + cursor_dlg_regs->qos_ramp_disable = 0; +} + +static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + unsigned int pipe_idx) +{ + unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; + enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; + bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format)); + + unsigned int pixel_chunk_bytes = 0; + unsigned int min_pixel_chunk_bytes = 0; + unsigned int dpte_group_bytes = 0; + unsigned int mpte_group_bytes = 0; + + unsigned int p1_pixel_chunk_bytes = 0; + unsigned int p1_min_pixel_chunk_bytes = 0; + unsigned int p1_dpte_group_bytes = 0; + unsigned int p1_mpte_group_bytes = 0; + + unsigned int detile_buf_plane1_addr = 0; + unsigned int detile_buf_size_in_bytes; + double stored_swath_l_bytes; + double stored_swath_c_bytes; + bool is_phantom_pipe; + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); + min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); + mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml2_rgbe_alpha) + p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); + + rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib); + rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); + rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); + + rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); + rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); + rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); + rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); + + detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); + + if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { + unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); +#endif + DML_ASSERT(p0_pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; + if (dual_plane) { + unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); +#endif + if (sw_mode == dml2_sw_linear) { + DML_ASSERT(p1_pte_row_height_linear >= 8); + } + rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; + } + } else { + rq_regs->rq_regs_l.pte_row_height_linear = 0; + rq_regs->rq_regs_c.pte_row_height_linear = 0; + } + + rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0); + rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0); + + // FIXME_DCN4, programming guide has dGPU condition + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + + stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); + stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); + is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + //DML_LOG_VERBOSE_rq_regs_st(rq_regs); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +static void rq_dlg_get_dlg_reg( + struct dml2_core_internal_scratch *s, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + const unsigned int pipe_idx) +{ + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + + l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); + + l->source_format = dml2_444_8; + l->odm_mode = dml2_odm_mode_bypass; + l->dual_plane = false; + l->htotal = 0; + l->hactive = 0; + l->hblank_end = 0; + l->vblank_end = 0; + l->interlaced = false; + l->pclk_freq_in_mhz = 0.0; + l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + l->ref_freq_to_pix_freq = 0.0; + + if (l->plane_idx < DML2_MAX_PLANES) { + + l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; + l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; + l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; + + l->dual_plane = is_dual_plane(l->source_format); + + l->htotal = l->timing->h_total; + l->hactive = l->timing->h_active; + l->hblank_end = l->timing->h_blank_end; + l->vblank_end = l->timing->v_blank_end; + l->interlaced = l->timing->interlaced; + l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; + l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + + DML_ASSERT(l->refclk_freq_in_mhz != 0); + DML_ASSERT(l->pclk_freq_in_mhz != 0); + DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (l->odm_mode == dml2_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); + } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + l->num_active_pipes = mode_lib->mp.num_active_pipes; + l->first_pipe_idx_in_plane = DML2_MAX_PLANES; + l->pipe_idx_in_combine = 0; // pipe index within the plane + l->odm_combine_factor = 2; + + if (l->odm_mode == dml2_odm_mode_combine_3to1) + l->odm_combine_factor = 3; + else if (l->odm_mode == dml2_odm_mode_combine_4to1) + l->odm_combine_factor = 4; + + for (unsigned int i = 0; i < l->num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { + if (i < l->first_pipe_idx_in_plane) { + l->first_pipe_idx_in_plane = i; + } + } + } + l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + } + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits + + l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); + + DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + + l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); + disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; + + DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + + DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + + l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + + if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { + DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + } + + l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; + l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; + + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + + // Active + l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + + l->refcyc_per_line_delivery_pre_c = 0.0; + l->refcyc_per_line_delivery_c = 0.0; + + if (l->dual_plane) { + l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + + l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + + l->refcyc_per_req_delivery_pre_c = 0.0; + l->refcyc_per_req_delivery_c = 0.0; + if (l->dual_plane) { + l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + } + + // TTU - Cursor + DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); + DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); + + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + + l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); + + l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); + + disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); + + // CHECK for HW registers' range, DML_ASSERT or clamp + DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); + + + DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + + if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; + } + if (l->dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; + } + } + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); + if (l->dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); + } + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + if (l->dual_plane) { + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + } + + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + + } +} + +static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; + arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max + arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; + arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024; + arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); + arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); +#endif + +} + +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) +{ + rq_dlg_get_wm_regs(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) +{ + rq_dlg_get_arb_params(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, + struct dml2_core_internal_display_mode_lib *mode_lib, + struct dml2_dchub_per_pipe_register_set *out, int pipe_index) +{ + rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); + rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); + out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; +} + +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) +{ + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) +{ + dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); +} + +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; + + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } +} + +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + union dmub_cmd_fams2_config *fams2_base_programming, + union dmub_cmd_fams2_config *fams2_sub_programming, + enum dml2_pstate_method pstate_method, + int plane_index) +{ + const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; + const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index]; + + struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; + union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; + + unsigned int i; + + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + + /* from display configuration */ + base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; + base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; + base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch); + base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_descriptor->timing.v_active); + base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; + + /* from meta */ + base_programming->otg_vline_time_ns = + (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0); + base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines; + base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines; + base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines; + base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_pstate_meta->method_drr.programming_delay_otg_vlines); + base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines; + base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal; + + /* from core */ + base_programming->config.bits.min_ttu_vblank_usable = true; + for (i = 0; i < display_cfg->display_config.num_planes; i++) { + /* check if all planes support p-state in blank */ + if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && + mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { + base_programming->config.bits.min_ttu_vblank_usable = false; + break; + } + } + + switch (pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + /* legacy vactive */ + base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; + sub_programming->legacy.vactive_det_fill_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + /* legacy vblank */ + base_programming->type = FAMS2_STREAM_TYPE_VBLANK; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_fw_drr: + /* drr */ + base_programming->type = FAMS2_STREAM_TYPE_DRR; + sub_programming->drr.programming_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines; + sub_programming->drr.nom_stretched_vtotal = + (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline; + /* drr only clamps to vtotal min for single display */ + base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; + sub_programming->drr.only_stretch_if_required = true; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + /* subvp */ + base_programming->type = FAMS2_STREAM_TYPE_SUBVP; + sub_programming->subvp.vratio_numerator = + (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); + sub_programming->subvp.vratio_denominator = 1000; + sub_programming->subvp.programming_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines; + sub_programming->subvp.prefetch_to_mall_otg_vlines = + (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + sub_programming->subvp.phantom_vtotal = + (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal; + sub_programming->subvp.phantom_vactive = + (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive; + sub_programming->subvp.config.bits.is_multi_planar = + plane_descriptor->surface.plane1.height > 0; + sub_programming->subvp.config.bits.is_yuv420 = + plane_descriptor->pixel_format == dml2_420_8 || + plane_descriptor->pixel_format == dml2_420_10 || + plane_descriptor->pixel_format == dml2_420_12; + + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_na: + case dml2_pstate_method_count: + default: + /* this should never happen */ + break; + } +} + +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) +{ + unsigned int n; + + out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx); + out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx); + out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx); + out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx); + + for (n = 0; n < out->num_mcaches_plane0; n++) + out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n); + + for (n = 0; n < out->num_mcaches_plane1; n++) + out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n); + + out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx); + out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx); + out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx); + + out->valid = true; +} + +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) +{ + *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) +{ + out->mall_svp_size_requirement_ways = 0; + + out->nominal_vblank_pstate_latency_hiding_us = + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); + + out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; + + out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; + + out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] = + (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk][plane_idx]); +} + +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) +{ + double phantom_processing_delay_pix; + unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; + unsigned int phantom_v_active_lines; + unsigned int phantom_v_startup_lines; + unsigned int phantom_v_blank_lines; + unsigned int main_v_blank_lines; + unsigned int rem; + + phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); + phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); + dml2_core_div_rem(phantom_processing_delay_pix, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, + &rem); + if (rem) + phantom_processing_delay_lines++; + + phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; + + // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) + phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; + main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; + if (phantom_v_blank_lines > main_v_blank_lines) + phantom_v_blank_lines = main_v_blank_lines; + + out->phantom_v_active = phantom_v_active_lines; + // phantom_vtotal = vactive + vblank + out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; + + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; + + out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); +#endif +} + +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) +{ + unsigned int k, n; + + out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; + out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; + out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; + out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; + out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; + out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; + out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; + out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; + out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; + out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; + out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; + out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; + out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; + out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; + out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; + out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; + out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; + out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; + out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; + out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; + out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; + out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; + out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.global_temp_read_or_ppt_supported; + out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; + + out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; + out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; + out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; + out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; + out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; + out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; + out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; + + out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; + out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; + out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; + out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; + out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; + + out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; + out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; + out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; + + out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; + out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; + out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; + out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; + out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; + out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport; + out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; + out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support; + out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support; + + for (k = 0; k < out->display_config.num_planes; k++) { + + out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; + out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; + out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; + out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; + out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; + out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; + out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; + out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; + + if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; + + if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4; + + out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; + out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; + } + + out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib); + out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib); + out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib); + out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib); + + out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib); + out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib); + out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib); + out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib); + out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); + out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); + out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); + out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); + + out->informative.mall.total_surface_size_in_mall_bytes = 0; + out->informative.dpp.total_num_dpps_required = 0; + for (k = 0; k < out->display_config.num_planes; ++k) { + out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; + out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k]; + } + + out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; + out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib); + + out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib); + out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib); + out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib); + + out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib); + out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib); + + out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib); + + out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib); + + out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib); + + out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib); + + out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib); + + out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib); + + out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib); + out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib); + + out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib); + out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib); + out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + + out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib); + out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib); + + out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib); + out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib); + out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib); + + out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib); + + out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); + + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + + out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); + + out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; + + for (k = 0; k < out->display_config.num_planes; k++) { + + if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 0; + else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 1; + else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) + out->informative.misc.PrefetchMode[k] = 2; + else + out->informative.misc.PrefetchMode[k] = 3; + + out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; + out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; + out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; + out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; + out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; + out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; + out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; + out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; + out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; + out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; + out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; + out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; + out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; + out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; + out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; + out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; + out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; + out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; + out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; + out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; + out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; + out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; + out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; + out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; + out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; + out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; + out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; + out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; + out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; + out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; + out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; + out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; + out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; + out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; + + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; + out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; + out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; + out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; + out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; + out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; + out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; + out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; + out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; + out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; + out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; + out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; + out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; + out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + + out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0; + out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; + out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; + out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; + out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; + out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; + out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; + out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + + if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) + out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; + } + + // For this DV informative layer, all pipes in the same planes will just use the same id + // will have the optimization and helper layer later on + // only work when we can have high "mcache" that fit everything without thrashing the cache + for (k = 0; k < out->display_config.num_planes; k++) { + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; + } + + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; + } + } + out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { + out->informative.misc.ROBUrgencyAvoidance = true; + } else { + out->informative.misc.ROBUrgencyAvoidance = false; + } + } else { + out->informative.misc.ROBUrgencyAvoidance = true; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h new file mode 100644 index 000000000000..27ef0e096b25 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_DCN4_CALCS_H__ +#define __DML2_CORE_DCN4_CALCS_H__ + +#include "dml2_core_shared_types.h" + +struct dml2_dchub_watermark_regs; +struct dml2_display_arb_regs; +struct dml2_per_stream_programming; +struct dml2_dchub_per_pipe_register_set; +struct core_plane_support_info; +struct core_stream_support_info; +struct dml2_cursor_dlg_regs; +struct display_configuation_with_meta; + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params); +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params); +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *dml2_display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index); +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_index); +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_index); +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); + +void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c new file mode 100644 index 000000000000..cc4f0663c6d6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_core_factory.h" +#include "dml2_core_dcn4.h" +#include "dml2_external_lib_deps.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_core_instance)); + + out->project_id = project_id; + + switch (project_id) { + case dml2_project_dcn4x_stage1: + result = false; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = &core_dcn4_initialize; + out->mode_support = &core_dcn4_mode_support; + out->mode_programming = &core_dcn4_mode_programming; + out->populate_informative = &core_dcn4_populate_informative; + out->calculate_mcache_allocation = &core_dcn4_calculate_mcache_allocation; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h new file mode 100644 index 000000000000..411c514fe65c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_FACTORY_H__ +#define __DML2_CORE_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h new file mode 100644 index 000000000000..1087a8c926ff --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h @@ -0,0 +1,2341 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_SHARED_TYPES_H__ +#define __DML2_CORE_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_display_cfg_types.h" +#include "dml_top_types.h" + +#define __DML_VBA_DEBUG__ +#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 //<brief max vratio for one-to-one prefetch bw scheduling +#define __DML2_CALCS_MAX_VRATIO_PRE_EQU__ 6.0 //<brief max vratio for equalized prefetch bw scheduling +#define __DML2_CALCS_MAX_VRATIO_PRE__ 8.0 //<brief max prefetch vratio register limit + +#define __DML2_CALCS_DPP_INVALID__ 0 +#define __DML2_CALCS_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation +#define __DML2_CALCS_PIPE_NO_PLANE__ 99 + +struct dml2_core_ip_params { + unsigned int vblank_nom_default_us; + unsigned int remote_iommu_outstanding_translations; + unsigned int rob_buffer_size_kbytes; + unsigned int config_return_buffer_size_in_kbytes; + unsigned int config_return_buffer_segment_size_in_kbytes; + unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int meta_fifo_size_in_kentries; + unsigned int dpte_buffer_size_in_pte_reqs_luma; + unsigned int dpte_buffer_size_in_pte_reqs_chroma; + unsigned int pixel_chunk_size_kbytes; + unsigned int alpha_pixel_chunk_size_kbytes; + unsigned int min_pixel_chunk_size_bytes; + unsigned int writeback_chunk_size_kbytes; + unsigned int line_buffer_size_bits; + unsigned int max_line_buffer_lines; + unsigned int writeback_interface_buffer_size_kbytes; + unsigned int max_num_dpp; + unsigned int max_num_opp; + unsigned int max_num_otg; + unsigned int TDLUT_33cube_count; + unsigned int max_num_wb; + unsigned int max_dchub_pscl_bw_pix_per_clk; + unsigned int max_pscl_lb_bw_pix_per_clk; + unsigned int max_lb_vscl_bw_pix_per_clk; + unsigned int max_vscl_hscl_bw_pix_per_clk; + double max_hscl_ratio; + double max_vscl_ratio; + unsigned int max_hscl_taps; + unsigned int max_vscl_taps; + unsigned int odm_combine_support_mask; + unsigned int num_dsc; + unsigned int maximum_dsc_bits_per_component; + unsigned int maximum_pixels_per_line_per_dsc_unit; + bool dsc422_native_support; + bool cursor_64bpp_support; + double dispclk_ramp_margin_percent; + unsigned int dppclk_delay_subtotal; + unsigned int dppclk_delay_scl; + unsigned int dppclk_delay_scl_lb_only; + unsigned int dppclk_delay_cnvc_formatter; + unsigned int dppclk_delay_cnvc_cursor; + unsigned int cursor_buffer_size; + unsigned int cursor_chunk_size; + unsigned int dispclk_delay_subtotal; + bool dynamic_metadata_vm_enabled; + unsigned int max_inter_dcn_tile_repeaters; + unsigned int max_num_hdmi_frl_outputs; + unsigned int max_num_dp2p0_outputs; + unsigned int max_num_dp2p0_streams; + bool dcc_supported; + bool ptoi_supported; + double writeback_max_hscl_ratio; + double writeback_max_vscl_ratio; + double writeback_min_hscl_ratio; + double writeback_min_vscl_ratio; + unsigned int writeback_max_hscl_taps; + unsigned int writeback_max_vscl_taps; + unsigned int writeback_line_buffer_buffer_size; + + unsigned int words_per_channel; + bool imall_supported; + unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; + unsigned int subvp_swath_height_margin_lines; + unsigned int subvp_fw_processing_delay_us; + unsigned int subvp_pstate_allow_width_us; + // MRQ + bool dcn_mrq_present; + unsigned int zero_size_buffer_entries; + unsigned int compbuf_reserved_space_zs; + unsigned int dcc_meta_buffer_size_bytes; + unsigned int meta_chunk_size_kbytes; + unsigned int min_meta_chunk_size_bytes; + + unsigned int dchub_arb_to_ret_delay; // num of dcfclk + unsigned int hostvm_mode; +}; + +struct dml2_core_internal_DmlPipe { + double Dppclk; + double Dispclk; + double PixelClock; + double DCFClkDeepSleep; + unsigned int DPPPerSurface; + bool ScalerEnabled; + bool UPSPEnabled; + unsigned int UPSPVTaps; + enum dml2_sample_positioning UPSPSamplePositioning; + enum dml2_rotation_angle RotationAngle; + bool mirrored; + unsigned int ViewportHeight; + unsigned int ViewportHeightC; + unsigned int BlockWidth256BytesY; + unsigned int BlockHeight256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int BlockHeight256BytesC; + unsigned int BlockWidthY; + unsigned int BlockHeightY; + unsigned int BlockWidthC; + unsigned int BlockHeightC; + unsigned int InterlaceEnable; + unsigned int NumberOfCursors; + unsigned int VBlank; + unsigned int HTotal; + unsigned int HActive; + bool DCCEnable; + enum dml2_odm_mode ODMMode; + enum dml2_source_format_class SourcePixelFormat; + enum dml2_swizzle_mode SurfaceTiling; + unsigned int BytePerPixelY; + unsigned int BytePerPixelC; + bool ProgressiveToInterlaceUnitInOPP; + double VRatio; + double VRatioChroma; + unsigned int VTaps; + unsigned int VTapsChroma; + unsigned int PitchY; + unsigned int PitchC; + bool ViewportStationary; + unsigned int ViewportXStart; + unsigned int ViewportYStart; + unsigned int ViewportXStartC; + unsigned int ViewportYStartC; + bool FORCE_ONE_ROW_FOR_FRAME; + unsigned int SwathHeightY; + unsigned int SwathHeightC; + + unsigned int DCCMetaPitchY; + unsigned int DCCMetaPitchC; +}; + +enum dml2_core_internal_request_type { + dml2_core_internal_request_type_256_bytes = 0, + dml2_core_internal_request_type_128_bytes_non_contiguous = 1, + dml2_core_internal_request_type_128_bytes_contiguous = 2, + dml2_core_internal_request_type_na = 3 +}; +enum dml2_core_internal_bw_type { + dml2_core_internal_bw_sdp = 0, + dml2_core_internal_bw_dram = 1, + dml2_core_internal_bw_max +}; + +enum dml2_core_internal_soc_state_type { + dml2_core_internal_soc_state_sys_active = 0, + dml2_core_internal_soc_state_svp_prefetch = 1, + dml2_core_internal_soc_state_sys_idle = 2, + dml2_core_internal_soc_state_max +}; + +enum dml2_core_internal_output_type { + dml2_core_internal_output_type_unknown = 0, + dml2_core_internal_output_type_dp = 1, + dml2_core_internal_output_type_edp = 2, + dml2_core_internal_output_type_dp2p0 = 3, + dml2_core_internal_output_type_hdmi = 4, + dml2_core_internal_output_type_hdmifrl = 5 +}; + +enum dml2_core_internal_output_type_rate { + dml2_core_internal_output_rate_unknown = 0, + dml2_core_internal_output_rate_dp_rate_hbr = 1, + dml2_core_internal_output_rate_dp_rate_hbr2 = 2, + dml2_core_internal_output_rate_dp_rate_hbr3 = 3, + dml2_core_internal_output_rate_dp_rate_uhbr10 = 4, + dml2_core_internal_output_rate_dp_rate_uhbr13p5 = 5, + dml2_core_internal_output_rate_dp_rate_uhbr20 = 6, + dml2_core_internal_output_rate_hdmi_rate_3x3 = 7, + dml2_core_internal_output_rate_hdmi_rate_6x3 = 8, + dml2_core_internal_output_rate_hdmi_rate_6x4 = 9, + dml2_core_internal_output_rate_hdmi_rate_8x4 = 10, + dml2_core_internal_output_rate_hdmi_rate_10x4 = 11, + dml2_core_internal_output_rate_hdmi_rate_12x4 = 12, + dml2_core_internal_output_rate_hdmi_rate_16x4 = 13, + dml2_core_internal_output_rate_hdmi_rate_20x4 = 14 +}; + +struct dml2_core_internal_watermarks { + double UrgentWatermark; + double WritebackUrgentWatermark; + double DRAMClockChangeWatermark; + double FCLKChangeWatermark; + double WritebackDRAMClockChangeWatermark; + double WritebackFCLKChangeWatermark; + double StutterExitWatermark; + double StutterEnterPlusExitWatermark; + double LowPowerStutterExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; + double Z8StutterExitWatermark; + double Z8StutterEnterPlusExitWatermark; + double USRRetrainingWatermark; + double temp_read_or_ppt_watermark_us; +}; + +struct dml2_core_internal_mode_support_info { + //----------------- + // Mode Support Information + //----------------- + bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming + + // Mode Support Reason/ + bool WritebackLatencySupport; + bool ScaleRatioAndTapsSupport; + bool SourceFormatPixelAndScanSupport; + bool P2IWith420; + bool DSCSlicesODMModeSupported; + bool DSCOnlyIfNecessaryWithBPP; + bool DSC422NativeNotSupported; + bool LinkRateDoesNotMatchDPVersion; + bool LinkRateForMultistreamNotIndicated; + bool BPPForMultistreamNotIndicated; + bool MultistreamWithHDMIOreDP; + bool MSOOrODMSplitWithNonDPLink; + bool NotEnoughLanesForMSO; + bool NumberOfOTGSupport; + bool NumberOfTDLUT33cubeSupport; + bool NumberOfHDMIFRLSupport; + bool NumberOfDP2p0Support; + bool WritebackScaleRatioAndTapsSupport; + bool CursorSupport; + bool PitchSupport; + bool ViewportExceedsSurface; + //bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified; + bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + bool InvalidCombinationOfMALLUseForPStateAndStaticScreen; + bool InvalidCombinationOfMALLUseForPState; + bool ExceededMALLSize; + bool EnoughWritebackUnits; + + bool ExceededMultistreamSlots; + bool NotEnoughDSCUnits; + bool NotEnoughDSCSlices; + bool PixelsPerLinePerDSCUnitSupport; + bool DSCCLKRequiredMoreThanSupported; + bool DTBCLKRequiredMoreThanSupported; + bool LinkCapacitySupport; + + bool ROBSupport; + bool OutstandingRequestsSupport; + bool OutstandingRequestsUrgencyAvoidance; + + bool PTEBufferSizeNotExceeded; + bool DCCMetaBufferSizeNotExceeded; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support temp_read_or_ppt_support[DML2_MAX_PLANES]; + bool global_dram_clock_change_support_required; + bool global_dram_clock_change_supported; + bool global_fclk_change_supported; + bool global_temp_read_or_ppt_supported; + bool USRRetrainingSupport; + bool AvgBandwidthSupport; + bool UrgVactiveBandwidthSupport; + bool EnoughUrgentLatencyHidingSupport; + bool PrefetchScheduleSupported; + bool PrefetchSupported; + bool PrefetchBandwidthSupported; + bool DynamicMetadataSupported; + bool VRatioInPrefetchSupported; + bool DISPCLK_DPPCLK_Support; + bool TotalAvailablePipesSupport; + bool ODMSupport; + bool ModeSupport; + bool ViewportSizeSupport; + + bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming + bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required + unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode + + double OutputBpp[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES]; + + unsigned int AlignedYPitch[DML2_MAX_PLANES]; + unsigned int AlignedCPitch[DML2_MAX_PLANES]; + + unsigned int AlignedDCCMetaPitchY[DML2_MAX_PLANES]; + unsigned int AlignedDCCMetaPitchC[DML2_MAX_PLANES]; + + unsigned int request_size_bytes_luma[DML2_MAX_PLANES]; + unsigned int request_size_bytes_chroma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES]; + + unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYIndependentBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCIndependentBlock[DML2_MAX_PLANES]; + + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max]; + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_max]; // min between SDP and DRAM, for latency evaluation + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm etc. + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc. + + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor + double urg_bandwidth_required_qual[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor, use qual_row_bw + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip + + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double max_urgent_latency_us; + double max_non_urgent_latency_us; + double avg_non_urgent_latency_us; + double avg_urgent_latency_us; + double df_response_time_us; + + bool incorrect_imall_usage; + + bool g6_temp_read_support; + + struct dml2_core_internal_watermarks watermarks; + bool dcfclk_support; + bool qos_bandwidth_support; +}; + +struct dml2_core_internal_mode_support { + // Physical info; only using for programming + unsigned int state_idx; // <brief min clk state table index for mode support call + unsigned int qos_param_index; // to access the uclk dependent qos_parameters table + unsigned int active_min_uclk_dpm_index; // to access the min_clk table + unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg + + // Calculated Clocks + double RequiredDISPCLK; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc. + double RequiredDPPCLK[DML2_MAX_PLANES]; + double RequiredDISPCLKPerSurface[DML2_MAX_PLANES]; + double RequiredDTBCLK[DML2_MAX_PLANES]; + + double required_dscclk_freq_mhz[DML2_MAX_PLANES]; + + double FabricClock; /// <brief Basically just the clock freq at the min (or given) state + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes + double GlobalDTBCLK; /// <brief the Max DTBCLK freq out of all pipes + double uclk_freq_mhz; + double dram_bw_mbps; + double max_dram_bw_mbps; + double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps + + double MaxFabricClock; /// <brief Basically just the clock freq at the min (or given) state + double MaxDCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double max_dispclk_freq_mhz; + double max_dppclk_freq_mhz; + double max_dscclk_freq_mhz; + + bool NoTimeForPrefetch[DML2_MAX_PLANES]; + bool NoTimeForDynamicMetadata[DML2_MAX_PLANES]; + + // ---------------------------------- + // Mode Support Info and fail reason + // ---------------------------------- + struct dml2_core_internal_mode_support_info support; + + // These are calculated before the ModeSupport and ModeProgram step + // They represent the bound for the return buffer sizing + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + + // Info obtained at the end of mode support calculations + // The reported info is at the "optimal" state and combine setting + unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + unsigned int DETBufferSizeY[DML2_MAX_PLANES]; + unsigned int DETBufferSizeC[DML2_MAX_PLANES]; + unsigned int SwathHeightY[DML2_MAX_PLANES]; + unsigned int SwathHeightC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe + + // ---------------------------------- + // Intermediates/Informational + // ---------------------------------- + unsigned int TotImmediateFlipBytes; + bool DCCEnabledInAnySurface; + double WritebackRequiredDISPCLK; + double TimeCalc; + double TWait[DML2_MAX_PLANES]; + + bool UnboundedRequestEnabled; + unsigned int compbuf_reserved_space_64b; + bool hw_debug5; + unsigned int CompressedBufferSizeInkByte; + double VRatioPreY[DML2_MAX_PLANES]; + double VRatioPreC[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; + unsigned int swath_width_luma_ub[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int RequiredSlots[DML2_MAX_PLANES]; + unsigned int vm_bytes[DML2_MAX_PLANES]; + unsigned int DPTEBytesPerRow[DML2_MAX_PLANES]; + unsigned int PrefetchLinesY[DML2_MAX_PLANES]; + unsigned int PrefetchLinesC[DML2_MAX_PLANES]; + unsigned int MaxNumSwathY[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch + unsigned int MaxNumSwathC[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch + unsigned int PrefillY[DML2_MAX_PLANES]; + unsigned int PrefillC[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + bool use_one_row_for_frame[DML2_MAX_PLANES]; + bool use_one_row_for_frame_flip[DML2_MAX_PLANES]; + + double dst_y_prefetch[DML2_MAX_PLANES]; + double LinesForVM[DML2_MAX_PLANES]; + double LinesForDPTERow[DML2_MAX_PLANES]; + unsigned int SwathWidthYSingleDPP[DML2_MAX_PLANES]; + unsigned int SwathWidthCSingleDPP[DML2_MAX_PLANES]; + unsigned int BytePerPixelY[DML2_MAX_PLANES]; + unsigned int BytePerPixelC[DML2_MAX_PLANES]; + double BytePerPixelInDETY[DML2_MAX_PLANES]; + double BytePerPixelInDETC[DML2_MAX_PLANES]; + + unsigned int Read256BlockHeightY[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthY[DML2_MAX_PLANES]; + unsigned int Read256BlockHeightC[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthC[DML2_MAX_PLANES]; + unsigned int MacroTileHeightY[DML2_MAX_PLANES]; + unsigned int MacroTileHeightC[DML2_MAX_PLANES]; + unsigned int MacroTileWidthY[DML2_MAX_PLANES]; + unsigned int MacroTileWidthC[DML2_MAX_PLANES]; + + bool surf_linear128_l[DML2_MAX_PLANES]; + bool surf_linear128_c[DML2_MAX_PLANES]; + + double PSCL_FACTOR[DML2_MAX_PLANES]; + double PSCL_FACTOR_CHROMA[DML2_MAX_PLANES]; + double MaximumSwathWidthLuma[DML2_MAX_PLANES]; + double MaximumSwathWidthChroma[DML2_MAX_PLANES]; + double Tno_bw[DML2_MAX_PLANES]; + double Tno_bw_flip[DML2_MAX_PLANES]; + double dst_y_per_vm_flip[DML2_MAX_PLANES]; + double dst_y_per_row_flip[DML2_MAX_PLANES]; + double WritebackDelayTime[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + unsigned int dpte_row_height[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma[DML2_MAX_PLANES]; + double UrgLatency; + double TripToMemory; + double UrgentBurstFactorCursor[DML2_MAX_PLANES]; + double UrgentBurstFactorCursorPre[DML2_MAX_PLANES]; + double UrgentBurstFactorLuma[DML2_MAX_PLANES]; + double UrgentBurstFactorLumaPre[DML2_MAX_PLANES]; + double UrgentBurstFactorChroma[DML2_MAX_PLANES]; + double UrgentBurstFactorChromaPre[DML2_MAX_PLANES]; + double MaximumSwathWidthInLineBufferLuma; + double MaximumSwathWidthInLineBufferChroma; + double ExtraLatency; + double ExtraLatency_sr; + double ExtraLatencyPrefetch; + + double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta + double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta + double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES]; + double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; // overhead to the imall or phantom pipe + double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES]; + + bool is_using_mall_for_ss[DML2_MAX_PLANES]; + unsigned int meta_row_width_chroma[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES]; + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + unsigned int meta_req_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES]; + unsigned int meta_req_width[DML2_MAX_PLANES]; + unsigned int meta_row_width[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear[DML2_MAX_PLANES]; + unsigned int PTERequestSizeY[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES]; + unsigned int PTERequestSizeC[DML2_MAX_PLANES]; + unsigned int meta_req_height[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int meta_req_width_chroma[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + double TSetup[DML2_MAX_PLANES]; + double Tdmdl_vm_raw[DML2_MAX_PLANES]; + double Tdmdl_raw[DML2_MAX_PLANES]; + unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos. + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + double MaxActiveFCLKChangeLatencySupported; + + // Backend + bool RequiresDSC[DML2_MAX_PLANES]; + bool RequiresFEC[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + double DesiredOutputBpp[DML2_MAX_PLANES]; + double PixelClockBackEnd[DML2_MAX_PLANES]; + unsigned int DSCDelay[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES]; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NumberOfDPPDSC; + enum dml2_odm_mode ODMModeNoDSC; + enum dml2_odm_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + unsigned int EstimatedNumberOfDSCSlices[DML2_MAX_PLANES]; + + // Bandwidth Related Info + double BandwidthAvailableForImmediateFlip; + double vactive_sw_bw_l[DML2_MAX_PLANES]; // no dcc overhead, for the plane + double vactive_sw_bw_c[DML2_MAX_PLANES]; + double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; + double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + /* Max bandwidth calculated from prefetch schedule should be considered in addition to the pixel data bw to avoid ms/mp mismatches. + * 1. oto bw should also be considered when calculating peak urgent bw to avoid situations oto/equ mismatches between ms and mp + * + * 2. equ bandwidth needs to be considered for calculating peak urgent bw when equ schedule is used in mode support. + * Some slight difference in variables may cause the pixel data bandwidth to be higher + * even though overall equ prefetch bandwidths can be lower going from ms to mp + */ + double RequiredPrefetchBWMax[DML2_MAX_PLANES]; + double cursor_bw[DML2_MAX_PLANES]; + double prefetch_cursor_bw[DML2_MAX_PLANES]; + double prefetch_vmrow_bw[DML2_MAX_PLANES]; + double final_flip_bw[DML2_MAX_PLANES]; + double meta_row_bw[DML2_MAX_PLANES]; + unsigned int meta_row_bytes[DML2_MAX_PLANES]; + double dpte_row_bw[DML2_MAX_PLANES]; + double excess_vactive_fill_bw_l[DML2_MAX_PLANES]; + double excess_vactive_fill_bw_c[DML2_MAX_PLANES]; + double surface_avg_vactive_required_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; + double surface_peak_required_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; + + // Something that should be feedback to caller + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; + unsigned int SurfaceSizeInMALL[DML2_MAX_PLANES]; + unsigned int NoOfDPP[DML2_MAX_PLANES]; + unsigned int NoOfOPP[DML2_MAX_PLANES]; + bool MPCCombine[DML2_MAX_PLANES]; + double dcfclk_deepsleep; + double MinDPPCLKUsingSingleDPP[DML2_MAX_PLANES]; + bool SingleDPPViewportSizeSupportPerSurface[DML2_MAX_PLANES]; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES]; + bool PTEBufferSizeNotExceeded[DML2_MAX_PLANES]; + bool DCCMetaBufferSizeNotExceeded[DML2_MAX_PLANES]; + unsigned int TotalNumberOfActiveDPP; + unsigned int TotalNumberOfActiveOPP; + unsigned int TotalNumberOfSingleDPPSurfaces; + unsigned int TotalNumberOfDCCActiveDPP; + unsigned int Total3dlutActive; + + unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES]; + double VActiveLatencyHidingMargin[DML2_MAX_PLANES]; + double VActiveLatencyHidingUs[DML2_MAX_PLANES]; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; + double pstate_vactive_det_fill_delay_us[dml2_pstate_type_count][DML2_MAX_PLANES]; + + unsigned int num_mcaches_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES]; + unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES]; + + unsigned int num_mcaches_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES]; + unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES]; + + bool mall_comb_mcache_l[DML2_MAX_PLANES]; + bool mall_comb_mcache_c[DML2_MAX_PLANES]; + bool lc_comb_mcache[DML2_MAX_PLANES]; + + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + + unsigned int meta_row_height_luma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + + unsigned int MaximumVStartup[DML2_MAX_PLANES]; + + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + + enum dml2_pstate_method uclk_pstate_switch_modes[DML2_MAX_PLANES]; +}; + +/// @brief A mega structure that houses various info for model programming step. +struct dml2_core_internal_mode_program { + unsigned int qos_param_index; // to access the uclk dependent dpm table + unsigned int active_min_uclk_dpm_index; // to access the min_clk table + double FabricClock; /// <brief Basically just the clock freq at the min (or given) state + //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double dram_bw_mbps; + double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps + double uclk_freq_mhz; + unsigned int NoOfDPP[DML2_MAX_PLANES]; + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; + + //------------- + // Intermediate/Informational + //------------- + double UrgentLatency; + double TripToMemory; + double MetaTripToMemory; + unsigned int VInitPreFillY[DML2_MAX_PLANES]; + unsigned int VInitPreFillC[DML2_MAX_PLANES]; + unsigned int MaxNumSwathY[DML2_MAX_PLANES]; + unsigned int MaxNumSwathC[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + double BytePerPixelInDETY[DML2_MAX_PLANES]; + double BytePerPixelInDETC[DML2_MAX_PLANES]; + unsigned int BytePerPixelY[DML2_MAX_PLANES]; + unsigned int BytePerPixelC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe + unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES]; + double vactive_sw_bw_l[DML2_MAX_PLANES]; + double vactive_sw_bw_c[DML2_MAX_PLANES]; + double excess_vactive_fill_bw_l[DML2_MAX_PLANES]; + double excess_vactive_fill_bw_c[DML2_MAX_PLANES]; + + unsigned int PixelPTEBytesPerRow[DML2_MAX_PLANES]; + unsigned int vm_bytes[DML2_MAX_PLANES]; + unsigned int PrefetchSourceLinesY[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + unsigned int PrefetchSourceLinesC[DML2_MAX_PLANES]; + double PSCL_THROUGHPUT[DML2_MAX_PLANES]; + double PSCL_THROUGHPUT_CHROMA[DML2_MAX_PLANES]; + unsigned int DSCDelay[DML2_MAX_PLANES]; + double DPPCLKUsingSingleDPP[DML2_MAX_PLANES]; + + unsigned int Read256BlockHeightY[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthY[DML2_MAX_PLANES]; + unsigned int Read256BlockHeightC[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthC[DML2_MAX_PLANES]; + unsigned int MacroTileHeightY[DML2_MAX_PLANES]; + unsigned int MacroTileHeightC[DML2_MAX_PLANES]; + unsigned int MacroTileWidthY[DML2_MAX_PLANES]; + unsigned int MacroTileWidthC[DML2_MAX_PLANES]; + double MaximumSwathWidthLuma[DML2_MAX_PLANES]; + double MaximumSwathWidthChroma[DML2_MAX_PLANES]; + + bool surf_linear128_l[DML2_MAX_PLANES]; + bool surf_linear128_c[DML2_MAX_PLANES]; + + unsigned int SurfaceSizeInTheMALL[DML2_MAX_PLANES]; + double VRatioPrefetchY[DML2_MAX_PLANES]; + double VRatioPrefetchC[DML2_MAX_PLANES]; + double Tno_bw[DML2_MAX_PLANES]; + double Tno_bw_flip[DML2_MAX_PLANES]; + double final_flip_bw[DML2_MAX_PLANES]; + double prefetch_vmrow_bw[DML2_MAX_PLANES]; + double cursor_bw[DML2_MAX_PLANES]; + double prefetch_cursor_bw[DML2_MAX_PLANES]; + double WritebackDelay[DML2_MAX_PLANES]; + unsigned int dpte_row_height[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + + double dpte_row_bw[DML2_MAX_PLANES]; + double time_per_tdlut_group[DML2_MAX_PLANES]; + double UrgentBurstFactorCursor[DML2_MAX_PLANES]; + double UrgentBurstFactorCursorPre[DML2_MAX_PLANES]; + double UrgentBurstFactorLuma[DML2_MAX_PLANES]; + double UrgentBurstFactorLumaPre[DML2_MAX_PLANES]; + double UrgentBurstFactorChroma[DML2_MAX_PLANES]; + double UrgentBurstFactorChromaPre[DML2_MAX_PLANES]; + + double MaximumSwathWidthInLineBufferLuma; + double MaximumSwathWidthInLineBufferChroma; + + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + + double meta_row_bw[DML2_MAX_PLANES]; + unsigned int meta_row_bytes[DML2_MAX_PLANES]; + unsigned int meta_req_width[DML2_MAX_PLANES]; + unsigned int meta_req_height[DML2_MAX_PLANES]; + unsigned int meta_row_width[DML2_MAX_PLANES]; + unsigned int meta_row_height[DML2_MAX_PLANES]; + unsigned int meta_req_width_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_width_chroma[DML2_MAX_PLANES]; + unsigned int meta_req_height_chroma[DML2_MAX_PLANES]; + + unsigned int swath_width_luma_ub[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES]; + unsigned int PTERequestSizeY[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES]; + unsigned int PTERequestSizeC[DML2_MAX_PLANES]; + + double TWait[DML2_MAX_PLANES]; + double Tdmdl_vm_raw[DML2_MAX_PLANES]; + double Tdmdl_vm[DML2_MAX_PLANES]; + double Tdmdl_raw[DML2_MAX_PLANES]; + double Tdmdl[DML2_MAX_PLANES]; + double TSetup[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + + unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + + bool UnboundedRequestEnabled; + unsigned int CompressedBufferSizeInkByte; + unsigned int compbuf_reserved_space_64b; + bool hw_debug5; + unsigned int dcfclk_deep_sleep_hysteresis; + unsigned int min_return_latency_in_dcfclk; + + bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES]; + double ExtraLatency; + double ExtraLatency_sr; + double ExtraLatencyPrefetch; + bool PrefetchAndImmediateFlipSupported; + double TotalDataReadBandwidth; + double BandwidthAvailableForImmediateFlip; + bool NotEnoughTimeForDynamicMetadata[DML2_MAX_PLANES]; + + bool use_one_row_for_frame[DML2_MAX_PLANES]; + bool use_one_row_for_frame_flip[DML2_MAX_PLANES]; + + double TCalc; + unsigned int TotImmediateFlipBytes; + + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + bool dsc_enable[DML2_MAX_PLANES]; + unsigned int num_dsc_slices[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + bool immediate_flip_required; // any pipes need immediate flip + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + double TotalWRBandwidth; + double max_urgent_latency_us; + double df_response_time_us; + + enum dml2_pstate_method uclk_pstate_switch_modes[DML2_MAX_PLANES]; + // ------------------- + // Output + // ------------------- + unsigned int pipe_plane[DML2_MAX_PLANES]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe + unsigned int num_active_pipes; + + bool NoTimeToPrefetch[DML2_MAX_PLANES]; // <brief Prefetch schedule calculation result + + // Support + bool UrgVactiveBandwidthSupport; + bool PrefetchScheduleSupported; + bool UrgentBandwidthSupport; + bool PrefetchModeSupported; // <brief Is the prefetch mode (bandwidth and latency) supported + bool ImmediateFlipSupported; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool dcfclk_support; + + // Clock + double Dcfclk; + double Dispclk; // <brief dispclk being used in mode programming + double Dppclk[DML2_MAX_PLANES]; // <brief dppclk being used in mode programming + double GlobalDPPCLK; + + double DSCCLK[DML2_MAX_PLANES]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now + double DCFCLKDeepSleep; + + // ARB reg + bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + struct dml2_core_internal_watermarks Watermark; + + // DCC compression control + unsigned int request_size_bytes_luma[DML2_MAX_PLANES]; + unsigned int request_size_bytes_chroma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES]; + unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYIndependentBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCIndependentBlock[DML2_MAX_PLANES]; + + // Stutter Efficiency + double StutterEfficiency; + double StutterEfficiencyNotIncludingVBlank; + unsigned int NumberOfStutterBurstsPerFrame; + double Z8StutterEfficiency; + unsigned int Z8NumberOfStutterBurstsPerFrame; + double Z8StutterEfficiencyNotIncludingVBlank; + double LowPowerStutterEfficiency; + double LowPowerStutterEfficiencyNotIncludingVBlank; + unsigned int LowPowerNumberOfStutterBurstsPerFrame; + double StutterPeriod; + double Z8StutterEfficiencyBestCase; + unsigned int Z8NumberOfStutterBurstsPerFrameBestCase; + double Z8StutterEfficiencyNotIncludingVBlankBestCase; + double StutterPeriodBestCase; + + // DLG TTU reg + double MIN_DST_Y_NEXT_START[DML2_MAX_PLANES]; + bool VREADY_AT_OR_AFTER_VSYNC[DML2_MAX_PLANES]; + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + double dst_y_prefetch[DML2_MAX_PLANES]; + double dst_y_per_vm_vblank[DML2_MAX_PLANES]; + double dst_y_per_row_vblank[DML2_MAX_PLANES]; + double dst_y_per_vm_flip[DML2_MAX_PLANES]; + double dst_y_per_row_flip[DML2_MAX_PLANES]; + double MinTTUVBlank[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + unsigned int CursorDstXOffset[DML2_MAX_PLANES]; + unsigned int CursorDstYOffset[DML2_MAX_PLANES]; + unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES]; + + double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES]; + double time_per_pte_group_nom_luma[DML2_MAX_PLANES]; + double time_per_pte_group_nom_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_luma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_luma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_chroma[DML2_MAX_PLANES]; + double TimePerVMGroupVBlank[DML2_MAX_PLANES]; + double TimePerVMGroupFlip[DML2_MAX_PLANES]; + double TimePerVMRequestVBlank[DML2_MAX_PLANES]; + double TimePerVMRequestFlip[DML2_MAX_PLANES]; + + double DST_Y_PER_META_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_META_ROW_NOM_C[DML2_MAX_PLANES]; + double TimePerMetaChunkNominal[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkNominal[DML2_MAX_PLANES]; + double TimePerMetaChunkVBlank[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkVBlank[DML2_MAX_PLANES]; + double TimePerMetaChunkFlip[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkFlip[DML2_MAX_PLANES]; + + double FractionOfUrgentBandwidth; + double FractionOfUrgentBandwidthImmediateFlip; + double FractionOfUrgentBandwidthMALL; + + // RQ registers + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + double VActiveLatencyHidingUs[DML2_MAX_PLANES]; + unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES]; + bool is_using_mall_for_ss[DML2_MAX_PLANES]; + + // OTG + unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos. + unsigned int VStartup[DML2_MAX_PLANES]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank)) + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int pstate_keepout_dst_lines[DML2_MAX_PLANES]; + + // Latency and Support + double MaxActiveFCLKChangeLatencySupported; + bool USRRetrainingSupport; + bool g6_temp_read_support; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support temp_read_or_ppt_support[DML2_MAX_PLANES]; + bool global_dram_clock_change_supported; + bool global_fclk_change_supported; + bool global_temp_read_or_ppt_supported; + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; + double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; + + // buffer sizing + unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + unsigned int DETBufferSizeY[DML2_MAX_PLANES]; + unsigned int DETBufferSizeC[DML2_MAX_PLANES]; + unsigned int SwathHeightY[DML2_MAX_PLANES]; + unsigned int SwathHeightC[DML2_MAX_PLANES]; + + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor + double urg_bandwidth_required_qual[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor, use qual_row_bw + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max]; + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm traffic etc. + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc. + + double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES]; + double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES]; + double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; + double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES]; + + unsigned int num_mcaches_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES]; + unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES]; + + unsigned int num_mcaches_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES]; + unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES]; + + bool mall_comb_mcache_l[DML2_MAX_PLANES]; + bool mall_comb_mcache_c[DML2_MAX_PLANES]; + bool lc_comb_mcache[DML2_MAX_PLANES]; + + double impacted_prefetch_margin_us[DML2_MAX_PLANES]; +}; + +struct dml2_core_internal_SOCParametersList { + double UrgentLatency; + double ExtraLatency_sr; + double ExtraLatency; + double WritebackLatency; + double DRAMClockChangeLatency; + double FCLKChangeLatency; + double SRExitTime; + double SREnterPlusExitTime; + double SRExitTimeLowPower; + double SREnterPlusExitTimeLowPower; + double SRExitZ8Time; + double SREnterPlusExitZ8Time; + double USRRetrainingLatency; + double SMNLatency; + double g6_temp_read_blackout_us; + double temp_read_or_ppt_blackout_us; + double max_urgent_latency_us; + double df_response_time_us; + enum dml2_qos_param_type qos_type; +}; + +struct dml2_core_calcs_mode_support_locals { + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + + unsigned int meta_row_height_luma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + + bool dummy_boolean[3]; + unsigned int dummy_integer[3]; + unsigned int dummy_integer_array[36][DML2_MAX_PLANES]; + enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES]; + bool dummy_boolean_array[2][DML2_MAX_PLANES]; + double dummy_single[3]; + double dummy_single_array[DML2_MAX_PLANES]; + struct dml2_core_internal_watermarks dummy_watermark; + double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; + + unsigned int MaximumVStartup[DML2_MAX_PLANES]; + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + struct dml2_core_internal_SOCParametersList mSOCParameters; + struct dml2_core_internal_DmlPipe myPipe; + struct dml2_core_internal_DmlPipe SurfParameters[DML2_MAX_PLANES]; + unsigned int TotalNumberOfActiveWriteback; + unsigned int MaximumSwathWidthSupportLuma; + unsigned int MaximumSwathWidthSupportChroma; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool TotalAvailablePipesSupportNoDSC; + unsigned int NumberOfDPPNoDSC; + enum dml2_odm_mode ODMModeNoDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + bool TotalAvailablePipesSupportDSC; + unsigned int NumberOfDPPDSC; + enum dml2_odm_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalSlots; + unsigned int DSCFormatFactor; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + bool ImmediateFlipRequired; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool SubViewportMALLRefreshGreaterThan120Hz; + + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int MaxVStartup; + double PixelClockBackEndFactor; + unsigned int NumDSCUnitRequired; + + double Tvm_trips[DML2_MAX_PLANES]; + double Tr0_trips[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES]; + + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_row_bytes[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes[DML2_MAX_PLANES]; + bool stream_visited[DML2_MAX_PLANES]; + + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + bool recalc_prefetch_done; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + double prefetch_swath_time_us[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_mode_programming_locals { + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + + double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; + double surface_dummy_bw0[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; + unsigned int dummy_integer_array[4][DML2_MAX_PLANES]; + enum dml2_output_encoder_class dummy_output_encoder_array[DML2_MAX_PLANES]; + double dummy_single_array[2][DML2_MAX_PLANES]; + unsigned int dummy_long_array[8][DML2_MAX_PLANES]; + bool dummy_boolean_array[2][DML2_MAX_PLANES]; + bool dummy_boolean[2]; + double dummy_single[2]; + struct dml2_core_internal_watermarks dummy_watermark; + + unsigned int DSCFormatFactor; + struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES]; + unsigned int ReorderingBytes; + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int TotalDCCActiveDPP; + unsigned int TotalActiveDPP; + unsigned int Total3dlutActive; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG + bool immediate_flip_required; // any pipes need immediate flip + bool DestinationLineTimesForPrefetchLessThan2; + bool VRatioPrefetchMoreThanMax; + double MaxTotalRDBandwidthNotIncludingMALLPrefetch; + struct dml2_core_internal_SOCParametersList mmSOCParameters; + double Tvstartup_margin; + double dlg_vblank_start; + double LSetup; + double blank_lines_remaining; + double WRBandwidth; + struct dml2_core_internal_DmlPipe myPipe; + double PixelClockBackEndFactor; + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_row_bytes[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes[DML2_MAX_PLANES]; + + double Tvm_trips[DML2_MAX_PLANES]; + double Tr0_trips[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES]; + + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + unsigned int num_dsc_slices[DML2_MAX_PLANES]; + bool dsc_enable[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { + double ActiveDRAMClockChangeLatencyMargin[DML2_MAX_PLANES]; + double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES]; + double USRRetrainingLatencyMargin[DML2_MAX_PLANES]; + double g6_temp_read_latency_margin[DML2_MAX_PLANES]; + double temp_read_or_ppt_latency_margin[DML2_MAX_PLANES]; + + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DML2_MAX_PLANES]; + double LinesInDETC[DML2_MAX_PLANES]; + unsigned int LinesInDETYRoundedDownToSwath[DML2_MAX_PLANES]; + unsigned int LinesInDETCRoundedDownToSwath[DML2_MAX_PLANES]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + + unsigned int TotalActiveWriteback; + unsigned int LBLatencyHidingSourceLinesY[DML2_MAX_PLANES]; + unsigned int LBLatencyHidingSourceLinesC[DML2_MAX_PLANES]; + double TotalPixelBW; + double EffectiveDETBufferSizeY; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + unsigned int dst_y_pstate; + unsigned int src_y_pstate_l; + unsigned int src_y_pstate_c; + unsigned int src_y_ahead_l; + unsigned int src_y_ahead_c; + unsigned int sub_vp_lines_l; + unsigned int sub_vp_lines_c; + +}; + +struct dml2_core_calcs_CalculateVMRowAndSwath_locals { + unsigned int PTEBufferSizeInRequestsForLuma[DML2_MAX_PLANES]; + unsigned int PTEBufferSizeInRequestsForChroma[DML2_MAX_PLANES]; + unsigned int vm_bytes_l; + unsigned int vm_bytes_c; + unsigned int PixelPTEBytesPerRowY[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowC[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowStorageY[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowStorageC[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_height_luma_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DML2_MAX_PLANES]; + bool one_row_per_frame_fits_in_buffer[DML2_MAX_PLANES]; + unsigned int HostVMDynamicLevels; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_CalculateVMRowAndSwath_params { + const struct dml2_display_cfg *display_cfg; + unsigned int NumberOfActiveSurfaces; + struct dml2_core_internal_DmlPipe *myPipe; + unsigned int *SurfaceSizeInMALL; + unsigned int PTEBufferSizeInRequestsLuma; + unsigned int PTEBufferSizeInRequestsChroma; + unsigned int MALLAllocatedForDCN; + unsigned int *SwathWidthY; + unsigned int *SwathWidthC; + unsigned int HostVMMinPageSize; + unsigned int DCCMetaBufferSizeBytes; + bool mrq_present; + enum dml2_pstate_method *uclk_pstate_switch_modes; + + // Output + bool *PTEBufferSizeNotExceeded; + bool *DCCMetaBufferSizeNotExceeded; + + unsigned int *dpte_row_width_luma_ub; + unsigned int *dpte_row_width_chroma_ub; + unsigned int *dpte_row_height_luma; + unsigned int *dpte_row_height_chroma; + unsigned int *dpte_row_height_linear_luma; // VBA_DELTA + unsigned int *dpte_row_height_linear_chroma; // VBA_DELTA + + unsigned int *vm_group_bytes; + unsigned int *dpte_group_bytes; + unsigned int *PixelPTEReqWidthY; + unsigned int *PixelPTEReqHeightY; + unsigned int *PTERequestSizeY; + unsigned int *vmpg_width_y; + unsigned int *vmpg_height_y; + + unsigned int *PixelPTEReqWidthC; + unsigned int *PixelPTEReqHeightC; + unsigned int *PTERequestSizeC; + unsigned int *vmpg_width_c; + unsigned int *vmpg_height_c; + + unsigned int *dpde0_bytes_per_frame_ub_l; + unsigned int *dpde0_bytes_per_frame_ub_c; + + unsigned int *PrefetchSourceLinesY; + unsigned int *PrefetchSourceLinesC; + unsigned int *VInitPreFillY; + unsigned int *VInitPreFillC; + unsigned int *MaxNumSwathY; + unsigned int *MaxNumSwathC; + double *dpte_row_bw; + unsigned int *PixelPTEBytesPerRow; + unsigned int *dpte_row_bytes_per_row_l; + unsigned int *dpte_row_bytes_per_row_c; + unsigned int *vm_bytes; + bool *use_one_row_for_frame; + bool *use_one_row_for_frame_flip; + bool *is_using_mall_for_ss; + bool *PTE_BUFFER_MODE; + unsigned int *BIGK_FRAGMENT_SIZE; + + // MRQ + unsigned int *meta_req_width_luma; + unsigned int *meta_req_height_luma; + unsigned int *meta_row_width_luma; + unsigned int *meta_row_height_luma; + unsigned int *meta_pte_bytes_per_frame_ub_l; + + unsigned int *meta_req_width_chroma; + unsigned int *meta_req_height_chroma; + unsigned int *meta_row_width_chroma; + unsigned int *meta_row_height_chroma; + unsigned int *meta_pte_bytes_per_frame_ub_c; + double *meta_row_bw; + unsigned int *meta_row_bytes; + unsigned int *meta_row_bytes_per_row_ub_l; + unsigned int *meta_row_bytes_per_row_ub_c; +}; + +struct dml2_core_calcs_CalculatePrefetchSchedule_locals { + bool NoTimeToPrefetch; + unsigned int DPPCycles; + unsigned int DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double per_pipe_vactive_sw_bw; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingVM; + double TimeForFetchingRowInVBlank; + double LinesToRequestPrefetchPixelData; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double max_Tsw; + double Lsw_oto; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double total_row_bytes; + double prefetch_bw_pr; + double bytes_pp; + double dep_bytes; + double min_Lsw_oto; + double min_Lsw_equ; + double Tsw_est1; + double Tsw_est2; + double Tsw_est3; + double prefetch_bw1; + double prefetch_bw2; + double prefetch_bw3; + double prefetch_bw4; + double dst_y_prefetch_equ_impacted; + + double TWait_p; + unsigned int cursor_prefetch_bytes; +}; + +struct dml2_core_shared_calculate_det_buffer_size_params { + const struct dml2_display_cfg *display_cfg; + bool ForceSingleDPP; + unsigned int NumberOfActiveSurfaces; + bool UnboundedRequestEnabled; + unsigned int nomDETInKByte; + unsigned int MaxTotalDETInKByte; + unsigned int ConfigReturnBufferSizeInKByte; + unsigned int MinCompressedBufferSizeInKByte; + unsigned int ConfigReturnBufferSegmentSizeInkByte; + unsigned int CompressedBufferSegmentSizeInkByte; + double *ReadBandwidthLuma; + double *ReadBandwidthChroma; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *swath_time_value_us; + unsigned int *DPPPerSurface; + bool TryToAllocateForWriteLatency; + unsigned int bestEffortMinActiveLatencyHidingUs; + + // Output + unsigned int *DETBufferSizeInKByte; + unsigned int *CompressedBufferSizeInkByte; +}; + +struct dml2_core_shared_calculate_vm_and_row_bytes_params { + bool ViewportStationary; + bool DCCEnable; + unsigned int NumberOfDPPs; + unsigned int BlockHeight256Bytes; + unsigned int BlockWidth256Bytes; + enum dml2_source_format_class SourcePixelFormat; + unsigned int SurfaceTiling; + unsigned int BytePerPixel; + enum dml2_rotation_angle RotationAngle; + unsigned int SwathWidth; // per pipe + unsigned int ViewportHeight; + unsigned int ViewportXStart; + unsigned int ViewportYStart; + bool GPUVMEnable; + unsigned int GPUVMMaxPageTableLevels; + unsigned int GPUVMMinPageSizeKBytes; + unsigned int PTEBufferSizeInRequests; + unsigned int Pitch; + unsigned int MacroTileWidth; + unsigned int MacroTileHeight; + bool is_phantom; + unsigned int DCCMetaPitch; + bool mrq_present; + + // Output + unsigned int *PixelPTEBytesPerRow; // for bandwidth calculation + unsigned int *PixelPTEBytesPerRowStorage; // for PTE buffer size check + unsigned int *dpte_row_width_ub; + unsigned int *dpte_row_height; + unsigned int *dpte_row_height_linear; + unsigned int *PixelPTEBytesPerRow_one_row_per_frame; + unsigned int *dpte_row_width_ub_one_row_per_frame; + unsigned int *dpte_row_height_one_row_per_frame; + unsigned int *vmpg_width; + unsigned int *vmpg_height; + unsigned int *PixelPTEReqWidth; + unsigned int *PixelPTEReqHeight; + unsigned int *PTERequestSize; + unsigned int *dpde0_bytes_per_frame_ub; + + unsigned int *meta_row_bytes; + unsigned int *MetaRequestWidth; + unsigned int *MetaRequestHeight; + unsigned int *meta_row_width; + unsigned int *meta_row_height; + unsigned int *meta_pte_bytes_per_frame_ub; +}; + +struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals { + unsigned int MaximumSwathHeightY[DML2_MAX_PLANES]; + unsigned int MaximumSwathHeightC[DML2_MAX_PLANES]; + unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES]; + unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES]; + unsigned int SwathTimeValueUs[DML2_MAX_PLANES]; + + struct dml2_core_shared_calculate_det_buffer_size_params calculate_det_buffer_size_params; +}; + +struct dml2_core_shared_TruncToValidBPP_locals { +}; + +struct dml2_core_shared_CalculateDETBufferSize_locals { + unsigned int DETBufferSizePoolInKByte; + unsigned int NextDETBufferPieceInKByte; + unsigned int NextSurfaceToAssignDETPiece; + double TotalBandwidth; + double BandwidthOfSurfacesNotAssignedDETPiece; + unsigned int max_minDET; + unsigned int minDET; + unsigned int minDET_pipe; + unsigned int TotalBandwidthPerStream[DML2_MAX_PLANES]; + unsigned int TotalPixelRate; + unsigned int DETBudgetPerStream[DML2_MAX_PLANES]; + unsigned int RemainingDETBudgetPerStream[DML2_MAX_PLANES]; + unsigned int IdealDETBudget, DeltaDETBudget; + unsigned int ResidualDETAfterRounding; +}; + +struct dml2_core_shared_get_urgent_bandwidth_required_locals { + double required_bandwidth_mbps; + double required_bandwidth_mbps_this_surface; + double adj_factor_p0; + double adj_factor_p1; + double adj_factor_cur; + double adj_factor_p0_pre; + double adj_factor_p1_pre; + double adj_factor_cur_pre; + double per_plane_flip_bw[DML2_MAX_PLANES]; + double mall_svp_prefetch_factor; + double tmp_nom_adj_factor_p0; + double tmp_nom_adj_factor_p1; + double tmp_pref_adj_factor_p0; + double tmp_pref_adj_factor_p1; + double vm_row_bw; + double flip_and_active_bw; + double flip_and_prefetch_bw; + double flip_and_prefetch_bw_max; + double active_and_excess_bw; +}; + +struct dml2_core_shared_calculate_peak_bandwidth_required_locals { + double unity_array[DML2_MAX_PLANES]; + double zero_array[DML2_MAX_PLANES]; + double surface_dummy_bw[DML2_MAX_PLANES]; +}; + +struct dml2_core_shared_CalculateFlipSchedule_locals { + double min_row_time; + double Tvm_flip; + double Tr0_flip; + double ImmediateFlipBW; + double dpte_row_bytes; + double min_row_height; + double min_row_height_chroma; + double max_flip_time; + double lb_flip_bw; + double hvm_scaled_vm_bytes; + double num_rows; + double hvm_scaled_row_bytes; + double hvm_scaled_vm_row_bytes; + bool dual_plane; +}; + +struct dml2_core_shared_rq_dlg_get_dlg_reg_locals { + unsigned int plane_idx; + unsigned int stream_idx; + enum dml2_source_format_class source_format; + const struct dml2_timing_cfg *timing; + bool dual_plane; + enum dml2_odm_mode odm_mode; + + unsigned int htotal; + unsigned int hactive; + unsigned int hblank_end; + unsigned int vblank_end; + bool interlaced; + double pclk_freq_in_mhz; + double refclk_freq_in_mhz; + double ref_freq_to_pix_freq; + + unsigned int num_active_pipes; + unsigned int first_pipe_idx_in_plane; + unsigned int pipe_idx_in_combine; + unsigned int odm_combine_factor; + + double min_ttu_vblank; + unsigned int min_dst_y_next_start; + + unsigned int vready_after_vcount0; + + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + + double max_dst_y_per_vm_vblank; + double max_dst_y_per_row_vblank; + + double vratio_pre_l; + double vratio_pre_c; + + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_l; + + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_l; + + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_c; + + double dst_y_per_pte_row_nom_l; + double dst_y_per_pte_row_nom_c; + double refcyc_per_pte_group_nom_l; + double refcyc_per_pte_group_nom_c; + double refcyc_per_pte_group_vblank_l; + double refcyc_per_pte_group_vblank_c; + double refcyc_per_pte_group_flip_l; + double refcyc_per_pte_group_flip_c; + double refcyc_per_tdlut_group; + + double dst_y_per_meta_row_nom_l; + double dst_y_per_meta_row_nom_c; + double refcyc_per_meta_chunk_nom_l; + double refcyc_per_meta_chunk_nom_c; + double refcyc_per_meta_chunk_vblank_l; + double refcyc_per_meta_chunk_vblank_c; + double refcyc_per_meta_chunk_flip_l; + double refcyc_per_meta_chunk_flip_c; +}; + +struct dml2_core_shared_CalculateMetaAndPTETimes_params { + struct dml2_core_internal_scratch *scratch; + const struct dml2_display_cfg *display_cfg; + unsigned int NumberOfActiveSurfaces; + bool *use_one_row_for_frame; + double *dst_y_per_row_vblank; + double *dst_y_per_row_flip; + unsigned int *BytePerPixelY; + unsigned int *BytePerPixelC; + unsigned int *dpte_row_height; + unsigned int *dpte_row_height_chroma; + unsigned int *dpte_group_bytes; + unsigned int *PTERequestSizeY; + unsigned int *PTERequestSizeC; + unsigned int *PixelPTEReqWidthY; + unsigned int *PixelPTEReqHeightY; + unsigned int *PixelPTEReqWidthC; + unsigned int *PixelPTEReqHeightC; + unsigned int *dpte_row_width_luma_ub; + unsigned int *dpte_row_width_chroma_ub; + unsigned int *tdlut_groups_per_2row_ub; + bool mrq_present; + unsigned int MetaChunkSize; + unsigned int MinMetaChunkSizeBytes; + unsigned int *meta_row_width; + unsigned int *meta_row_width_chroma; + unsigned int *meta_row_height; + unsigned int *meta_row_height_chroma; + unsigned int *meta_req_width; + unsigned int *meta_req_width_chroma; + unsigned int *meta_req_height; + unsigned int *meta_req_height_chroma; + + // Output + double *time_per_tdlut_group; + double *DST_Y_PER_PTE_ROW_NOM_L; + double *DST_Y_PER_PTE_ROW_NOM_C; + double *time_per_pte_group_nom_luma; + double *time_per_pte_group_vblank_luma; + double *time_per_pte_group_flip_luma; + double *time_per_pte_group_nom_chroma; + double *time_per_pte_group_vblank_chroma; + double *time_per_pte_group_flip_chroma; + + double *DST_Y_PER_META_ROW_NOM_L; + double *DST_Y_PER_META_ROW_NOM_C; + + double *TimePerMetaChunkNominal; + double *TimePerChromaMetaChunkNominal; + double *TimePerMetaChunkVBlank; + double *TimePerChromaMetaChunkVBlank; + double *TimePerMetaChunkFlip; + double *TimePerChromaMetaChunkFlip; +}; + +struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params { + const struct dml2_display_cfg *display_cfg; + bool USRRetrainingRequired; + unsigned int NumberOfActiveSurfaces; + unsigned int MaxLineBufferLines; + unsigned int LineBufferSize; + unsigned int WritebackInterfaceBufferSize; + double DCFCLK; + double ReturnBW; + bool SynchronizeTimings; + bool SynchronizeDRRDisplaysForUCLKPStateChange; + unsigned int *dpte_group_bytes; + struct dml2_core_internal_SOCParametersList mmSOCParameters; + unsigned int WritebackChunkSize; + double SOCCLK; + double DCFClkDeepSleep; + unsigned int *DETBufferSizeY; + unsigned int *DETBufferSizeC; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *SwathWidthY; + unsigned int *SwathWidthC; + unsigned int *DPPPerSurface; + double *BytePerPixelDETY; + double *BytePerPixelDETC; + unsigned int *DSTXAfterScaler; + unsigned int *DSTYAfterScaler; + bool UnboundedRequestEnabled; + unsigned int CompressedBufferSizeInkByte; + bool max_outstanding_when_urgent_expected; + unsigned int max_outstanding_requests; + unsigned int max_request_size_bytes; + unsigned int *meta_row_height_l; + unsigned int *meta_row_height_c; + enum dml2_pstate_method *uclk_pstate_switch_modes; + + // Output + struct dml2_core_internal_watermarks *Watermark; + enum dml2_pstate_change_support *DRAMClockChangeSupport; + bool *global_dram_clock_change_support_required; + bool *global_dram_clock_change_supported; + double *MaxActiveDRAMClockChangeLatencySupported; + unsigned int *SubViewportLinesNeededInMALL; + enum dml2_pstate_change_support *FCLKChangeSupport; + bool *global_fclk_change_supported; + double *MaxActiveFCLKChangeLatencySupported; + bool *USRRetrainingSupport; + double *VActiveLatencyHidingMargin; + double *VActiveLatencyHidingUs; + bool *g6_temp_read_support; + enum dml2_pstate_change_support *temp_read_or_ppt_support; + bool *global_temp_read_or_ppt_supported; +}; + +struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params { + const struct dml2_display_cfg *display_cfg; + unsigned int ConfigReturnBufferSizeInKByte; + unsigned int MaxTotalDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + unsigned int rob_buffer_size_kbytes; + unsigned int pixel_chunk_size_kbytes; + bool ForceSingleDPP; + unsigned int NumberOfActiveSurfaces; + unsigned int nomDETInKByte; + unsigned int ConfigReturnBufferSegmentSizeInkByte; + unsigned int CompressedBufferSegmentSizeInkByte; + double *ReadBandwidthLuma; + double *ReadBandwidthChroma; + double *MaximumSwathWidthLuma; + double *MaximumSwathWidthChroma; + unsigned int *Read256BytesBlockHeightY; + unsigned int *Read256BytesBlockHeightC; + unsigned int *Read256BytesBlockWidthY; + unsigned int *Read256BytesBlockWidthC; + bool *surf_linear128_l; + bool *surf_linear128_c; + enum dml2_odm_mode *ODMMode; + unsigned int *BytePerPixY; + unsigned int *BytePerPixC; + double *BytePerPixDETY; + double *BytePerPixDETC; + unsigned int *DPPPerSurface; + bool mrq_present; + unsigned int dummy[2][DML2_MAX_PLANES]; + unsigned int swath_width_luma_ub_single_dpp[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub_single_dpp[DML2_MAX_PLANES]; + + // output + unsigned int *req_per_swath_ub_l; + unsigned int *req_per_swath_ub_c; + unsigned int *swath_width_luma_ub; + unsigned int *swath_width_chroma_ub; + unsigned int *SwathWidth; + unsigned int *SwathWidthChroma; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *request_size_bytes_luma; + unsigned int *request_size_bytes_chroma; + unsigned int *DETBufferSizeInKByte; + unsigned int *DETBufferSizeY; + unsigned int *DETBufferSizeC; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *full_swath_bytes_single_dpp_l; + unsigned int *full_swath_bytes_single_dpp_c; + bool *UnboundedRequestEnabled; + unsigned int *compbuf_reserved_space_64b; + unsigned int *CompressedBufferSizeInkByte; + bool *ViewportSizeSupportPerSurface; + bool *ViewportSizeSupport; + bool *hw_debug5; + + struct dml2_core_shared_calculation_funcs *funcs; +}; + +struct dml2_core_calcs_CalculateStutterEfficiency_locals { + double DETBufferingTimeY; + double SwathWidthYCriticalSurface; + double SwathHeightYCriticalSurface; + double VActiveTimeCriticalSurface; + double FrameTimeCriticalSurface; + unsigned int BytePerPixelYCriticalSurface; + unsigned int DETBufferSizeYCriticalSurface; + double MinTTUVBlankCriticalSurface; + unsigned int BlockWidth256BytesYCriticalSurface; + bool SinglePlaneCriticalSurface; + bool SinglePipeCriticalSurface; + double TotalCompressedReadBandwidth; + double TotalRowReadBandwidth; + double AverageDCCCompressionRate; + double EffectiveCompressedBufferSize; + double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; + double StutterBurstTime; + unsigned int TotalActiveWriteback; + double LinesInDETY; + double LinesInDETYRoundedDownToSwath; + double MaximumEffectiveCompressionLuma; + double MaximumEffectiveCompressionChroma; + double TotalZeroSizeRequestReadBandwidth; + double TotalZeroSizeCompressedReadBandwidth; + double AverageDCCZeroSizeFraction; + double AverageZeroSizeCompressionRate; + bool stream_visited[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_CalculateStutterEfficiency_params { + const struct dml2_display_cfg *display_cfg; + unsigned int CompressedBufferSizeInkByte; + bool UnboundedRequestEnabled; + unsigned int MetaFIFOSizeInKEntries; + unsigned int ZeroSizeBufferEntries; + unsigned int PixelChunkSizeInKByte; + unsigned int NumberOfActiveSurfaces; + unsigned int ROBBufferSizeInKByte; + double TotalDataReadBandwidth; + double DCFCLK; + double ReturnBW; + unsigned int CompbufReservedSpace64B; + unsigned int CompbufReservedSpaceZs; + bool hw_debug5; + double SRExitTime; + double SRExitTimeLowPower; + double SRExitZ8Time; + bool SynchronizeTimings; + double StutterEnterPlusExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; + double Z8StutterEnterPlusExitWatermark; + bool ProgressiveToInterlaceUnitInOPP; + double *MinTTUVBlank; + unsigned int *DPPPerSurface; + unsigned int *DETBufferSizeY; + unsigned int *BytePerPixelY; + double *BytePerPixelDETY; + unsigned int *SwathWidthY; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *BlockHeight256BytesY; + unsigned int *BlockWidth256BytesY; + unsigned int *BlockHeight256BytesC; + unsigned int *BlockWidth256BytesC; + unsigned int *DCCYMaxUncompressedBlock; + unsigned int *DCCCMaxUncompressedBlock; + double *ReadBandwidthSurfaceLuma; + double *ReadBandwidthSurfaceChroma; + double *meta_row_bw; + double *dpte_row_bw; + bool rob_alloc_compressed; + + // output + double *StutterEfficiencyNotIncludingVBlank; + double *StutterEfficiency; + double *LowPowerStutterEfficiencyNotIncludingVBlank; + double *LowPowerStutterEfficiency; + unsigned int *NumberOfStutterBurstsPerFrame; + unsigned int *LowPowerNumberOfStutterBurstsPerFrame; + double *Z8StutterEfficiencyNotIncludingVBlank; + double *Z8StutterEfficiency; + unsigned int *Z8NumberOfStutterBurstsPerFrame; + double *StutterPeriod; + bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; +}; + +struct dml2_core_calcs_CalculatePrefetchSchedule_params { + const struct dml2_display_cfg *display_cfg; + double HostVMInefficiencyFactor; + struct dml2_core_internal_DmlPipe *myPipe; + unsigned int DSCDelay; + double DPPCLKDelaySubtotalPlusCNVCFormater; + double DPPCLKDelaySCL; + double DPPCLKDelaySCLLBOnly; + double DPPCLKDelayCNVCCursor; + double DISPCLKDelaySubtotal; + unsigned int DPP_RECOUT_WIDTH; + enum dml2_output_format_class OutputFormat; + unsigned int MaxInterDCNTileRepeaters; + unsigned int VStartup; + unsigned int HostVMMinPageSize; + bool DynamicMetadataEnable; + bool DynamicMetadataVMEnabled; + unsigned int DynamicMetadataLinesBeforeActiveRequired; + unsigned int DynamicMetadataTransmittedBytes; + double UrgentLatency; + double ExtraLatencyPrefetch; + double TCalc; + unsigned int vm_bytes; + unsigned int PixelPTEBytesPerRow; + double PrefetchSourceLinesY; + unsigned int VInitPreFillY; + unsigned int MaxNumSwathY; + double PrefetchSourceLinesC; + unsigned int VInitPreFillC; + unsigned int MaxNumSwathC; + unsigned int swath_width_luma_ub; // per-pipe + unsigned int swath_width_chroma_ub; // per-pipe + unsigned int SwathHeightY; + unsigned int SwathHeightC; + double TWait; + double Ttrip; + double Turg; + bool setup_for_tdlut; + unsigned int tdlut_pte_bytes_per_frame; + unsigned int tdlut_bytes_per_frame; + double tdlut_opt_time; + double tdlut_drain_time; + + unsigned int num_cursors; + unsigned int cursor_bytes_per_chunk; + unsigned int cursor_bytes_per_line; + + // MRQ + bool dcc_enable; + bool mrq_present; + unsigned int meta_row_bytes; + double mall_prefetch_sdp_overhead_factor; + + double impacted_dst_y_pre; + double vactive_sw_bw_l; // per surface bw + double vactive_sw_bw_c; // per surface bw + + // output + unsigned int *DSTXAfterScaler; + unsigned int *DSTYAfterScaler; + double *dst_y_prefetch; + double *dst_y_per_vm_vblank; + double *dst_y_per_row_vblank; + double *VRatioPrefetchY; + double *VRatioPrefetchC; + double *RequiredPrefetchPixelDataBWLuma; + double *RequiredPrefetchPixelDataBWChroma; + double *RequiredPrefetchBWMax; + bool *NotEnoughTimeForDynamicMetadata; + double *Tno_bw; + double *Tno_bw_flip; + double *prefetch_vmrow_bw; + double *Tdmdl_vm; + double *Tdmdl; + double *TSetup; + double *Tpre_rounded; + double *Tpre_oto; + double *Tvm_trips; + double *Tr0_trips; + double *Tvm_trips_flip; + double *Tr0_trips_flip; + double *Tvm_trips_flip_rounded; + double *Tr0_trips_flip_rounded; + unsigned int *VUpdateOffsetPix; + unsigned int *VUpdateWidthPix; + unsigned int *VReadyOffsetPix; + double *prefetch_cursor_bw; + double *prefetch_sw_bytes; + double *prefetch_swath_time_us; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params { + unsigned int num_active_planes; + enum dml2_source_format_class *pixel_format; + unsigned int rob_buffer_size_kbytes; + unsigned int compressed_buffer_size_kbytes; + unsigned int chunk_bytes_l; // same for all planes + unsigned int chunk_bytes_c; + unsigned int *detile_buffer_size_bytes_l; + unsigned int *detile_buffer_size_bytes_c; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *lb_source_lines_l; + unsigned int *lb_source_lines_c; + unsigned int *swath_height_l; + unsigned int *swath_height_c; + double *prefetch_sw_bytes; + double *Tpre_rounded; + double *Tpre_oto; + double estimated_dcfclk_mhz; + double estimated_urg_bandwidth_required_mbps; + double *line_time; + double *dst_y_prefetch; + + // output + bool *recalc_prefetch_schedule; + double *impacted_dst_y_pre; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals { + unsigned int max_Trpd_dcfclk_cycles; + unsigned int burst_bytes_to_fill_det; + double time_to_fill_det_us; + unsigned int accumulated_return_path_dcfclk_cycles[DML2_MAX_PLANES]; + bool prefetch_global_check_passed; + unsigned int src_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int src_swath_bytes_c[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_l[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_c[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_calculate_mcache_row_bytes_params { + unsigned int num_chans; + unsigned int mem_word_bytes; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned int gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + + //enum dml_rotation_angle rotation_angle; + bool surf_vert; + unsigned int vp_stationary; + unsigned int tiling_mode; + bool imall_enable; + + unsigned int vp_start_x; + unsigned int vp_start_y; + unsigned int full_vp_width; + unsigned int full_vp_height; + unsigned int blk_width; + unsigned int blk_height; + unsigned int vmpg_width; + unsigned int vmpg_height; + unsigned int full_swath_bytes; + unsigned int bytes_per_pixel; + + // output + unsigned int *num_mcaches; + unsigned int *mcache_row_bytes; + unsigned int *mcache_row_bytes_per_channel; + unsigned int *meta_row_width_ub; + double *dcc_dram_bw_nom_overhead_factor; + double *dcc_dram_bw_pref_overhead_factor; + unsigned int *mvmpg_width; + unsigned int *mvmpg_height; + unsigned int *full_vp_access_width_mvmpg_aligned; + unsigned int *mvmpg_per_mcache_lb; +}; + +struct dml2_core_shared_calculate_mcache_setting_locals { + struct dml2_core_calcs_calculate_mcache_row_bytes_params l_p; + struct dml2_core_calcs_calculate_mcache_row_bytes_params c_p; + + bool is_dual_plane; + unsigned int mvmpg_width_l; + unsigned int mvmpg_height_l; + unsigned int full_vp_access_width_mvmpg_aligned_l; + unsigned int mvmpg_per_mcache_lb_l; + unsigned int meta_row_width_l; + + unsigned int mvmpg_width_c; + unsigned int mvmpg_height_c; + unsigned int full_vp_access_width_mvmpg_aligned_c; + unsigned int mvmpg_per_mcache_lb_c; + unsigned int meta_row_width_c; + + unsigned int lc_comb_last_mcache_size; + double luma_time_factor; + double mcache_remainder_l; + double mcache_remainder_c; + unsigned int mvmpg_access_width_l; + unsigned int mvmpg_access_width_c; + unsigned int avg_mcache_element_size_l; + unsigned int avg_mcache_element_size_c; + + unsigned int full_vp_access_width_l; + unsigned int full_vp_access_width_c; +}; + +struct dml2_core_calcs_calculate_mcache_setting_params { + bool dcc_enable; + unsigned int num_chans; + unsigned int mem_word_bytes; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned int gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + + enum dml2_source_format_class source_format; + bool surf_vert; + unsigned int vp_stationary; + unsigned int tiling_mode; + bool imall_enable; + + unsigned int vp_start_x_l; + unsigned int vp_start_y_l; + unsigned int full_vp_width_l; + unsigned int full_vp_height_l; + unsigned int blk_width_l; + unsigned int blk_height_l; + unsigned int vmpg_width_l; + unsigned int vmpg_height_l; + unsigned int full_swath_bytes_l; + unsigned int bytes_per_pixel_l; + + unsigned int vp_start_x_c; + unsigned int vp_start_y_c; + unsigned int full_vp_width_c; + unsigned int full_vp_height_c; + unsigned int blk_width_c; + unsigned int blk_height_c; + unsigned int vmpg_width_c; + unsigned int vmpg_height_c; + unsigned int full_swath_bytes_c; + unsigned int bytes_per_pixel_c; + + // output + unsigned int *num_mcaches_l; + unsigned int *mcache_row_bytes_l; + unsigned int *mcache_row_bytes_per_channel_l; + unsigned int *mcache_offsets_l; + unsigned int *mcache_shift_granularity_l; + double *dcc_dram_bw_nom_overhead_factor_l; + double *dcc_dram_bw_pref_overhead_factor_l; + + unsigned int *num_mcaches_c; + unsigned int *mcache_row_bytes_c; + unsigned int *mcache_row_bytes_per_channel_c; + unsigned int *mcache_offsets_c; + unsigned int *mcache_shift_granularity_c; + double *dcc_dram_bw_nom_overhead_factor_c; + double *dcc_dram_bw_pref_overhead_factor_c; + + bool *mall_comb_mcache_l; + bool *mall_comb_mcache_c; + bool *lc_comb_mcache; +}; + +struct dml2_core_calcs_calculate_tdlut_setting_params { + // input params + double dispclk_mhz; + bool setup_for_tdlut; + enum dml2_tdlut_width_mode tdlut_width_mode; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode; + unsigned int cursor_buffer_size; + bool gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + bool is_gfx11; + bool tdlut_mpc_width_flag; + + // output param + unsigned int *tdlut_pte_bytes_per_frame; + unsigned int *tdlut_bytes_per_frame; + unsigned int *tdlut_groups_per_2row_ub; + double *tdlut_opt_time; + double *tdlut_drain_time; + unsigned int *tdlut_bytes_to_deliver; + unsigned int *tdlut_bytes_per_group; +}; + +struct dml2_core_calcs_calculate_peak_bandwidth_required_params { + // output + double (*urg_vactive_bandwidth_required)[dml2_core_internal_bw_max]; + double (*urg_bandwidth_required)[dml2_core_internal_bw_max]; + double (*urg_bandwidth_required_qual)[dml2_core_internal_bw_max]; + double (*non_urg_bandwidth_required)[dml2_core_internal_bw_max]; + double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES]; + double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES]; + + // input + const struct dml2_display_cfg *display_cfg; + bool inc_flip_bw; + unsigned int num_active_planes; + unsigned int *num_of_dpp; + double *dcc_dram_bw_nom_overhead_factor_p0; + double *dcc_dram_bw_nom_overhead_factor_p1; + double *dcc_dram_bw_pref_overhead_factor_p0; + double *dcc_dram_bw_pref_overhead_factor_p1; + double *mall_prefetch_sdp_overhead_factor; + double *mall_prefetch_dram_overhead_factor; + double *surface_read_bandwidth_l; + double *surface_read_bandwidth_c; + double *prefetch_bandwidth_l; + double *prefetch_bandwidth_c; + double *prefetch_bandwidth_max; + double *excess_vactive_fill_bw_l; + double *excess_vactive_fill_bw_c; + double *cursor_bw; + double *dpte_row_bw; + double *meta_row_bw; + double *prefetch_cursor_bw; + double *prefetch_vmrow_bw; + double *flip_bw; + double *urgent_burst_factor_l; + double *urgent_burst_factor_c; + double *urgent_burst_factor_cursor; + double *urgent_burst_factor_prefetch_l; + double *urgent_burst_factor_prefetch_c; + double *urgent_burst_factor_prefetch_cursor; +}; + +struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params { + /* inputs */ + const struct dml2_display_cfg *display_cfg; + bool mrq_present; + unsigned int num_active_planes; + unsigned int *num_of_dpp; + unsigned int *meta_row_height_l; + unsigned int *meta_row_height_c; + unsigned int *meta_row_bytes_per_row_ub_l; + unsigned int *meta_row_bytes_per_row_ub_c; + unsigned int *dpte_row_height_l; + unsigned int *dpte_row_height_c; + unsigned int *dpte_bytes_per_row_l; + unsigned int *dpte_bytes_per_row_c; + unsigned int *byte_per_pix_l; + unsigned int *byte_per_pix_c; + unsigned int *swath_width_l; + unsigned int *swath_width_c; + unsigned int *swath_height_l; + unsigned int *swath_height_c; + double latency_to_hide_us[DML2_MAX_PLANES]; + + /* outputs */ + unsigned int *bytes_required_l; + unsigned int *bytes_required_c; +}; + +// A list of overridable function pointers in the core +// shared calculation library. +struct dml2_core_shared_calculation_funcs { + void (*calculate_det_buffer_size)(struct dml2_core_shared_calculate_det_buffer_size_params *p); +}; + +struct dml2_core_internal_scratch { + // Scratch space for function locals + struct dml2_core_calcs_mode_support_locals dml_core_mode_support_locals; + struct dml2_core_calcs_mode_programming_locals dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals; + struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals CheckGlobalPrefetchAdmissibility_locals; + struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals; + struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals; + struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals; + struct dml2_core_shared_get_urgent_bandwidth_required_locals get_urgent_bandwidth_required_locals; + struct dml2_core_shared_calculate_peak_bandwidth_required_locals calculate_peak_bandwidth_required_locals; + struct dml2_core_shared_CalculateFlipSchedule_locals CalculateFlipSchedule_locals; + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals rq_dlg_get_dlg_reg_locals; + struct dml2_core_calcs_CalculateStutterEfficiency_locals CalculateStutterEfficiency_locals; + + // Scratch space for function params + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params CheckGlobalPrefetchAdmissibility_params; + struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params; + struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params; + struct dml2_core_shared_calculate_mcache_setting_locals calculate_mcache_setting_locals; + struct dml2_core_shared_CalculateMetaAndPTETimes_params CalculateMetaAndPTETimes_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params calculate_peak_bandwidth_params; + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params calculate_bytes_to_fetch_required_to_hide_latency_params; +}; + +//struct dml2_svp_mode_override; +struct dml2_core_internal_display_mode_lib { + struct dml2_core_ip_params ip; + struct dml2_soc_bb soc; + struct dml2_ip_capabilities ip_caps; + + //@brief Mode Support and Mode programming struct + // Used to hold input; intermediate and output of the calculations + struct dml2_core_internal_mode_support ms; // struct for mode support + struct dml2_core_internal_mode_program mp; // struct for mode programming + // Available overridable calculators for core_shared. + // if null, core_shared will use default calculators. + struct dml2_core_shared_calculation_funcs funcs; + + struct dml2_core_internal_scratch scratch; +}; + +struct dml2_core_calcs_mode_support_ex { + struct dml2_core_internal_display_mode_lib *mode_lib; + const struct dml2_display_cfg *in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table; + int min_clk_index; + enum dml2_project_id project_id; + //unsigned int in_state_index; + struct dml2_core_internal_mode_support_info *out_evaluation_info; +}; + +struct core_display_cfg_support_info; + +struct dml2_core_calcs_mode_programming_ex { + struct dml2_core_internal_display_mode_lib *mode_lib; + const struct dml2_display_cfg *in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info; + int min_clk_index; + enum dml2_project_id project_id; + struct dml2_display_cfg_programming *programming; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c new file mode 100644 index 000000000000..b57d0f6ea6a1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c @@ -0,0 +1,788 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_core_utils.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 1; + break; + case dml2_422_planar_10: + val = 1; + break; + case dml2_422_planar_12: + val = 1; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 1; + break; + case dml2_422_packed_10: + val = 1; + break; + case dml2_422_packed_12: + val = 1; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || (support->global_dram_clock_change_supported == 0 && support->global_dram_clock_change_support_required)) + DML_LOG_VERBOSE("DML: support: dram_clock_change_support = %d\n", support->global_dram_clock_change_supported); + if (!fail_only || support->ImmediateFlipSupport == 0) + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { + DML_ASSERT(0); + return 256; + }; +} + +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + return (byte_per_pixel != 2); +} + +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) +{ + return sw_mode == dml2_sw_linear; +}; + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) + version = 12; + else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) + version = 11; + else { + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = false; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = true; + break; + } + } + + if (!clk_entry_found) + DML_ASSERT(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = false; + + if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) + ret_val = true; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..95f0d017add4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c new file mode 100644 index 000000000000..22969a533a7b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_dpmm_dcn4.h" +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" +#include "lib_float_math.h" + +static double dram_bw_kbps_to_uclk_khz(unsigned long long bandwidth_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_khz = 0; + unsigned long uclk_mbytes_per_tick = 0; + + uclk_mbytes_per_tick = dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + uclk_khz = (double)bandwidth_kbps / uclk_mbytes_per_tick; + + return uclk_khz; +} + +static void get_minimum_clocks_for_latency(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, + double *uclk, + double *fclk, + double *dcfclk) +{ + int min_clock_index_for_latency; + + if (in_out->display_cfg->stage3.success) + min_clock_index_for_latency = in_out->display_cfg->stage3.min_clk_index_for_latency; + else + min_clock_index_for_latency = in_out->display_cfg->stage1.min_clk_index_for_latency; + + *dcfclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_dcfclk_khz; + *fclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_fclk_khz; + *uclk = dram_bw_kbps_to_uclk_khz(in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].pre_derate_dram_bw_kbps, + &in_out->soc_bb->clk_table.dram_config); +} + +static unsigned long dml_round_up(double a) +{ + if (a - (unsigned long)a > 0) { + return ((unsigned long)a) + 1; + } + return (unsigned long)a; +} + +static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + if (in_out->display_cfg->display_config.hostvm_enable) + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100); + else + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_fclk_latency, min_dcfclk_latency; + double min_uclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + /* assumes DF throttling is enabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + + /* assumes DF throttling is disabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg; + double min_fclk_avg; + double min_dcfclk_avg; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dram_derate_percent_pixel / 100); + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.fclk_derate_percent / 100); + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dcfclk_derate_percent / 100); + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); +} + +static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) +{ + enum dentist_divider_range { + DFS_DIVIDER_RANGE_1_START = 8, /* 2.00 */ + DFS_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ + DFS_DIVIDER_RANGE_2_START = 64, /* 16.00 */ + DFS_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ + DFS_DIVIDER_RANGE_3_START = 128, /* 32.00 */ + DFS_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ + DFS_DIVIDER_RANGE_4_START = 248, /* 62.00 */ + DFS_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ + DFS_DIVIDER_RANGE_SCALE_FACTOR = 4 + }; + + enum DFS_base_divider_id { + DFS_BASE_DID_1 = 0x08, + DFS_BASE_DID_2 = 0x40, + DFS_BASE_DID_3 = 0x60, + DFS_BASE_DID_4 = 0x7e, + DFS_MAX_DID = 0x7f + }; + + unsigned int divider; + + if (clock_khz < 1 || vco_freq_khz < 1 || clock_khz > vco_freq_khz) + return false; + + clock_khz *= 1.0 + margin; + + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + + /* we want to floor here to get higher clock than required rather than lower */ + if (divider < DFS_DIVIDER_RANGE_2_START) { + if (divider < DFS_DIVIDER_RANGE_1_START) + *divider_id = DFS_BASE_DID_1; + else + *divider_id = DFS_BASE_DID_1 + ((divider - DFS_DIVIDER_RANGE_1_START) / DFS_DIVIDER_RANGE_1_STEP); + } else if (divider < DFS_DIVIDER_RANGE_3_START) { + *divider_id = DFS_BASE_DID_2 + ((divider - DFS_DIVIDER_RANGE_2_START) / DFS_DIVIDER_RANGE_2_STEP); + } else if (divider < DFS_DIVIDER_RANGE_4_START) { + *divider_id = DFS_BASE_DID_3 + ((divider - DFS_DIVIDER_RANGE_3_START) / DFS_DIVIDER_RANGE_3_STEP); + } else { + *divider_id = DFS_BASE_DID_4 + ((divider - DFS_DIVIDER_RANGE_4_START) / DFS_DIVIDER_RANGE_4_STEP); + if (*divider_id > DFS_MAX_DID) + *divider_id = DFS_MAX_DID; + } + + *rounded_khz = vco_freq_khz * DFS_DIVIDER_RANGE_SCALE_FACTOR / divider; + + return true; +} + +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + +static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) +{ + bool result = false; + int index = 0; + + if (clock_table->num_clk_values > 2) { + while (index < clock_table->num_clk_values && clock_table->clk_values_khz[index] < min_value) + index++; + + if (index < clock_table->num_clk_values) { + *rounded_value = clock_table->clk_values_khz[index]; + result = true; + } + } else if (clock_table->clk_values_khz[clock_table->num_clk_values - 1] >= min_value) { + *rounded_value = min_value; + result = true; + } + return result; +} + +static bool round_up_to_next_dpm(unsigned long *clock_value, const struct dml2_clk_table *clock_table) +{ + return round_up_and_copy_to_next_dpm(*clock_value, clock_value, clock_table); +} + +static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result; + + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); + + /* these clocks are optional, so they can fail to map, in which case map all to 0 */ + if (result) { + if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) { + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0; + } + } + + return result; +} + +static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result; + int index; + + result = false; + for (index = 0; index < state_table->uclk.num_clk_values; index++) { + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + result = true; + break; + } + } + + if (result) { + result = false; + for (index = 0; index < state_table->uclk.num_clk_values; index++) { + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + result = true; + break; + } + } + } + + // SVP is not supported on any coarse grained SoCs + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; + + return result; +} + +static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mode_support_result, struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result = false; + bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_identical = false; + unsigned int i; + + if (!state_table || !display_cfg) + return false; + + if (state_table->dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (state_table->fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + if (state_table->fclk.num_clk_values == state_table->dcfclk.num_clk_values && + state_table->fclk.num_clk_values == state_table->uclk.num_clk_values) { + clock_state_count_identical = true; + } + + if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_identical) + result = map_soc_min_clocks_to_dpm_fine_grained(display_cfg, state_table); + else + result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (result) + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); + } + + for (i = 0; i < display_cfg->display_config.num_streams; i++) { + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); + } + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); + + return result; +} + +static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + // Check that all displays timings are the same + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->stream_descriptors[remap_array[i - 1]].timing, &display_config->stream_descriptors[remap_array[i]].timing, sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + // Check if any displays are drr + for (i = 0; i < remap_array_size; i++) { + if (display_config->stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + // Trivial sync is possible if all displays are identical and none are DRR + return !contains_drr && identical; +} + +static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) +{ + unsigned int i; + int min_idle_us = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < in_out->programming->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + if (remap_array_size == 0) + return 0; + + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[0]].vblank_reserved_time_us; + + for (i = 1; i < remap_array_size; i++) { + if (min_idle_us > mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us) + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us; + } + + return min_idle_us; +} + +static bool determine_power_management_features_with_vblank_only(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int min_idle_us; + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, 0xF)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xF); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + + return true; +} + +static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_without_vactive_margin_mask = 0x0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (mode_support_result->cfg_support_info.plane_support_info[i].active_latency_hiding_us + < latency_hiding_requirement_us) + displays_without_vactive_margin_mask |= (0x1 << i); + } + + return displays_without_vactive_margin_mask; +} + +static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_with_fams_mask = 0x0; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config != dml2_svp_mode_override_auto) + displays_with_fams_mask |= (0x1 << i); + } + + return displays_with_fams_mask; +} + +static bool determine_power_management_features_with_vactive_and_vblank(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + if (in_out->programming->uclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + } + } + + if (in_out->programming->fclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + return true; +} + +static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int displays_without_fams_mask = 0x0; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + displays_without_fams_mask = + get_displays_with_fams_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if ((displays_without_vactive_margin_mask & ~displays_without_fams_mask) == 0) + in_out->programming->uclk_pstate_supported = true; + + return true; +} + +static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; +} + +static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; +} + +static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int i; + bool result; + double dispclk_khz; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + calculate_system_active_minimums(in_out); + calculate_svp_prefetch_minimums(in_out); + calculate_idle_minimums(in_out); + + // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore + // active minimums must be boosted to prefetch minimums + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; + + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; + + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; + + // need some massaging for the dispclk ramping cases: + dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); + // ramping margin should not make dispclk exceed the maximum dispclk speed: + dispclk_khz = math_min2(dispclk_khz, in_out->min_clk_table->max_clocks_khz.dispclk); + // but still the required dispclk can be more than the maximum dispclk speed: + dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + // DPP Ref is always set to max of all DPP clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + // DTB Ref is always set to max of all DTB clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + } + + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } + + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; + + result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); + + // By default, all power management features are not enabled + in_out->programming->fclk_pstate_supported = false; + in_out->programming->uclk_pstate_supported = false; + + return result; +} + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + + result = map_mode_to_soc_dpm(in_out); + + // Check if any can be enabled by nominal vblank idle time + determine_power_management_features_with_vblank_only(in_out); + + // Check if any can be enabled in vactive/vblank + determine_power_management_features_with_vactive_and_vblank(in_out); + + // Check if any can be enabled via fams + determine_power_management_features_with_fams(in_out); + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + return result; +} + +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + result = map_mode_to_soc_dpm(in_out); + + if (in_out->display_cfg->stage3.success) + in_out->programming->uclk_pstate_supported = true; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (displays_without_vactive_margin_mask == 0) { + in_out->programming->fclk_pstate_supported = true; + } else { + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xFF); + if (in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + in_out->programming->stutter.supported_in_blank = true; + else + in_out->programming->stutter.supported_in_blank = false; + + // TODO: Fix me Sam + if (in_out->soc_bb->power_management_parameters.z8_min_idle_time > 0 && + in_out->programming->informative.power_management.z8.stutter_period >= in_out->soc_bb->power_management_parameters.z8_min_idle_time) + in_out->programming->z8_stutter.meets_eco = true; + else + in_out->programming->z8_stutter.meets_eco = false; + + if (in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) + in_out->programming->z8_stutter.supported_in_blank = true; + else + in_out->programming->z8_stutter.supported_in_blank = false; + + return result; +} + +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + const struct dml2_display_cfg *display_cfg = &in_out->display_cfg->display_config; + const struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->core->clean_me_up.mode_lib; + struct dml2_dchub_global_register_set *dchubbub_regs = &in_out->programming->global_regs; + + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + /* set A */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + /* set B */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + dchubbub_regs->num_watermark_sets = 2; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h new file mode 100644 index 000000000000..e7b58f2efda4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DPMM_DCN4_H__ +#define __DML2_DPMM_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c new file mode 100644 index 000000000000..dfd01440737d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_dpmm_factory.h" +#include "dml2_dpmm_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + return true; +} + +static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + return true; +} + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_dpmm_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->map_mode_to_soc_dpm = &dummy_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->map_mode_to_soc_dpm = &dpmm_dcn3_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->map_mode_to_soc_dpm = &dpmm_dcn4_map_mode_to_soc_dpm; + out->map_watermarks = &dpmm_dcn4_map_watermarks; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h new file mode 100644 index 000000000000..20ba2e446f1d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DPMM_FACTORY_H__ +#define __DML2_DPMM_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c new file mode 100644 index 000000000000..a265f254152c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_mcg_dcn4.h" +#include "dml_top_soc_parameter_types.h" + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table); + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return build_min_clock_table(in_out->soc_bb, in_out->min_clk_table); +} + +static unsigned long long uclk_to_dram_bw_kbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + unsigned long long bw_kbps = 0; + + bw_kbps = (unsigned long long) uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + return bw_kbps; +} + +static unsigned long round_up_to_quantized_values(unsigned long value, const unsigned long *quantized_values, int num_quantized_values) +{ + int i; + + if (!quantized_values) + return 0; + + for (i = 0; i < num_quantized_values; i++) { + if (quantized_values[i] > value) + return quantized_values[i]; + } + + return 0; +} + +static bool build_min_clk_table_fine_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + bool dcfclk_fine_grained = false, fclk_fine_grained = false; + + int i; + unsigned int j; + + unsigned long min_dcfclk_khz = 0; + unsigned long min_fclk_khz = 0; + unsigned long prev_100, cur_50; + + if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[0]; + min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[0]; + + // First calculate the table for "balanced" bandwidths across UCLK/FCLK + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); + + min_table->dram_bw_table.entries[i].min_fclk_khz = (unsigned long)((((double)min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps * soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100) / ((double)soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100)) / soc_bb->fabric_datapath_to_dcn_data_return_bytes); + } + min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; + + // To create the minium table, effectively shift "up" all the dcfclk/fclk entries by 1, and then replace the lowest entry with min fclk/dcfclk + for (i = min_table->dram_bw_table.num_entries - 1; i > 0; i--) { + prev_100 = min_table->dram_bw_table.entries[i - 1].min_fclk_khz; + cur_50 = min_table->dram_bw_table.entries[i].min_fclk_khz / 2; + min_table->dram_bw_table.entries[i].min_fclk_khz = prev_100 > cur_50 ? prev_100 : cur_50; + + if (!fclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_fclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_fclk_khz, soc_bb->clk_table.fclk.clk_values_khz, soc_bb->clk_table.fclk.num_clk_values); + } + } + min_table->dram_bw_table.entries[0].min_fclk_khz /= 2; + + // Clamp to minimums and maximums + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz < min_dcfclk_khz) + min_table->dram_bw_table.entries[i].min_dcfclk_khz = min_dcfclk_khz; + + if (min_table->dram_bw_table.entries[i].min_fclk_khz < min_fclk_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = min_fclk_khz; + + if (soc_bb->max_fclk_for_uclk_dpm_khz > 0 && + min_table->dram_bw_table.entries[i].min_fclk_khz > soc_bb->max_fclk_for_uclk_dpm_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->max_fclk_for_uclk_dpm_khz; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_fclk_khz * + soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_dcfclk_khz * soc_bb->fabric_datapath_to_dcn_data_return_bytes / soc_bb->return_bus_width_bytes; + + if (!dcfclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_dcfclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_dcfclk_khz, soc_bb->clk_table.dcfclk.clk_values_khz, soc_bb->clk_table.dcfclk.num_clk_values); + } + } + + // Prune states which are invalid (some clocks exceed maximum) + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz > min_table->max_clocks_khz.dcfclk || + min_table->dram_bw_table.entries[i].min_fclk_khz > min_table->max_clocks_khz.fclk) { + min_table->dram_bw_table.num_entries = i; + break; + } + } + + // Prune duplicate states + for (i = 0; i < (int)min_table->dram_bw_table.num_entries - 1; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz == min_table->dram_bw_table.entries[i + 1].min_dcfclk_khz && + min_table->dram_bw_table.entries[i].min_fclk_khz == min_table->dram_bw_table.entries[i + 1].min_fclk_khz && + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps == min_table->dram_bw_table.entries[i + 1].pre_derate_dram_bw_kbps) { + + // i + 1 is the same state as i, so shift everything + for (j = i + 1; j < min_table->dram_bw_table.num_entries; j++) { + min_table->dram_bw_table.entries[j].min_dcfclk_khz = min_table->dram_bw_table.entries[j + 1].min_dcfclk_khz; + min_table->dram_bw_table.entries[j].min_fclk_khz = min_table->dram_bw_table.entries[j + 1].min_fclk_khz; + min_table->dram_bw_table.entries[j].pre_derate_dram_bw_kbps = min_table->dram_bw_table.entries[j + 1].pre_derate_dram_bw_kbps; + } + min_table->dram_bw_table.num_entries--; + } + } + + return true; +} + +static bool build_min_clk_table_coarse_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + int i; + + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); + min_table->dram_bw_table.entries[i].min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[i]; + min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[i]; + } + min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; + + return true; +} + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + bool result; + bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_equal = false; + + if (!soc_bb || !min_table) + return false; + + if (soc_bb->clk_table.dcfclk.num_clk_values < 2 || soc_bb->clk_table.fclk.num_clk_values < 2) + return false; + + if (soc_bb->clk_table.uclk.num_clk_values > DML_MCG_MAX_CLK_TABLE_SIZE) + return false; + + if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.dcfclk.num_clk_values && + soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.uclk.num_clk_values) + clock_state_count_equal = true; + + min_table->fixed_clocks_khz.amclk = 0; + min_table->fixed_clocks_khz.dprefclk = soc_bb->dprefclk_mhz * 1000; + min_table->fixed_clocks_khz.pcierefclk = soc_bb->pcie_refclk_mhz * 1000; + min_table->fixed_clocks_khz.dchubrefclk = soc_bb->dchub_refclk_mhz * 1000; + min_table->fixed_clocks_khz.xtalclk = soc_bb->xtalclk_mhz * 1000; + + min_table->max_clocks_khz.dispclk = soc_bb->clk_table.dispclk.clk_values_khz[soc_bb->clk_table.dispclk.num_clk_values - 1]; + min_table->max_clocks_khz.dppclk = soc_bb->clk_table.dppclk.clk_values_khz[soc_bb->clk_table.dppclk.num_clk_values - 1]; + min_table->max_clocks_khz.dscclk = soc_bb->clk_table.dscclk.clk_values_khz[soc_bb->clk_table.dscclk.num_clk_values - 1]; + min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; + min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; + + min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + + min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; + min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; + + if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_equal) + result = build_min_clk_table_fine_grained(soc_bb, min_table); + else + result = build_min_clk_table_coarse_grained(soc_bb, min_table); + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h new file mode 100644 index 000000000000..f54fde8fba90 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_MCG_DCN4_H__ +#define __DML2_MCG_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); +bool mcg_dcn4_unit_test(void); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c new file mode 100644 index 000000000000..c60b8fe90819 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_mcg_factory.h" +#include "dml2_mcg_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return true; +} + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_mcg_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->build_min_clock_table = &dummy_build_min_clock_table; + result = true; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->build_min_clock_table = &mcg_dcn4_build_min_clock_table; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h new file mode 100644 index 000000000000..ad307deca3b0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_MCG_FACTORY_H__ +#define __DML2_MCG_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c new file mode 100644 index 000000000000..1b9579a32ff2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn3.h" + +static void sort(double *list_a, int list_a_size) +{ + // For all elements b[i] in list_b[] + for (int i = 0; i < list_a_size - 1; i++) { + // Find the first element of list_a that's larger than b[i] + for (int j = i; j < list_a_size - 1; j++) { + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); + } + } +} + +static double get_max_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index) +{ + struct dml2_plane_parameters *plane_descriptor; + long max_reserved_time_ns = 0; + + for (unsigned int i = 0; i < config->display_config.num_planes; i++) { + plane_descriptor = &config->display_config.plane_descriptors[i]; + + if (plane_descriptor->stream_index == stream_index) + if (plane_descriptor->overrides.reserved_vblank_time_ns > max_reserved_time_ns) + max_reserved_time_ns = plane_descriptor->overrides.reserved_vblank_time_ns; + } + + return (max_reserved_time_ns / 1000.0); +} + + +static void set_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index, double reserved_time_us) +{ + struct dml2_plane_parameters *plane_descriptor; + + for (unsigned int i = 0; i < config->display_config.num_planes; i++) { + plane_descriptor = &config->display_config.plane_descriptors[i]; + + if (plane_descriptor->stream_index == stream_index) + plane_descriptor->overrides.reserved_vblank_time_ns = (long int)(reserved_time_us * 1000); + } +} + +static void remove_duplicates(double *list_a, int *list_a_size) +{ + int j = 0; + + if (*list_a_size == 0) + return; + + for (int i = 1; i < *list_a_size; i++) { + if (list_a[j] != list_a[i]) { + j++; + list_a[j] = list_a[i]; + } + } + + *list_a_size = j + 1; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +static bool iterate_to_next_candidiate(struct dml2_pmo_instance *pmo, int size) +{ + int borrow_from, i; + bool success = false; + + if (pmo->scratch.pmo_dcn3.current_candidate[0] > 0) { + pmo->scratch.pmo_dcn3.current_candidate[0]--; + success = true; + } else { + for (borrow_from = 1; borrow_from < size && pmo->scratch.pmo_dcn3.current_candidate[borrow_from] == 0; borrow_from++) + ; + + if (borrow_from < size) { + pmo->scratch.pmo_dcn3.current_candidate[borrow_from]--; + for (i = 0; i < borrow_from; i++) { + pmo->scratch.pmo_dcn3.current_candidate[i] = pmo->scratch.pmo_dcn3.reserved_time_candidates_count[i] - 1; + } + + success = true; + } + } + + return success; +} + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, + &display_config->display_config.stream_descriptors[remap_array[i]].timing, + sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + for (i = 0; i < remap_array_size; i++) { + if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + return !contains_drr && identical; +} + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; + + pmo->options = in_out->options; + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + } + + return true; +} + +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int stream_index, plane_index, candidate_count; + double min_reserved_vblank_time = 0; + int fclk_twait_needed_mask = 0x0; + int uclk_twait_needed_mask = 0x0; + + state->performed = true; + state->min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.max_latency_index = pmo->mcg_clock_table_size - 1; + pmo->scratch.pmo_dcn3.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + pmo->scratch.pmo_dcn3.stream_mask = 0xF; + + for (plane_index = 0; plane_index < in_out->base_display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &in_out->base_display_config->display_config.plane_descriptors[plane_index]; + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[plane_descriptor->stream_index]; + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_if_needed) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_try) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_if_needed) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_try) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (plane_descriptor->overrides.legacy_svp_config != dml2_svp_mode_override_auto) { + pmo->scratch.pmo_dcn3.stream_mask &= ~(0x1 << plane_descriptor->stream_index); + } + } + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[stream_index]; + + // The absolute minimum required time is the minimum of all the required budgets + /* + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate + == dml2_twait_budgeting_setting_require) + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us); + } + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate + == dml2_twait_budgeting_setting_require) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us); + } + } + + if (stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit + == dml2_twait_budgeting_setting_require) + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us); + */ + + min_reserved_vblank_time = get_max_reserved_time_on_all_planes_with_stream_index(in_out->base_display_config, stream_index); + + // Insert the absolute minimum into the array + candidate_count = 1; + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][0] = min_reserved_vblank_time; + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + if (!(pmo->scratch.pmo_dcn3.stream_mask & (0x1 << stream_index))) + continue; + + // For every optional feature, we create a candidate for it only if it's larger minimum. + if ((fclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us; + } + } + + if ((uclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + } + } + + if ((stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_try || + stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_if_needed) && + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > min_reserved_vblank_time) { + + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + } + + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + // Finally sort the array of candidates + sort(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + remove_duplicates(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + &pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + pmo->scratch.pmo_dcn3.current_candidate[stream_index] = + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] - 1; + } + + return true; +} + +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, stream_index; + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + stream_index = in_out->base_display_config->display_config.plane_descriptors[i].stream_index; + + if (in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]] * 1000) { + return false; + } + } + + return true; +} + +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int stream_index; + bool success = false; + bool reached_end; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn3.cur_latency_index < pmo->scratch.pmo_dcn3.max_latency_index) { + // If we haven't tried all the clock bounds to support this state, try a higher one + pmo->scratch.pmo_dcn3.cur_latency_index++; + + success = true; + } else { + // If there's nothing higher to try, then we have to have a smaller canadidate + reached_end = !iterate_to_next_candidiate(pmo, in_out->optimized_display_config->display_config.num_streams); + + if (!reached_end) { + pmo->scratch.pmo_dcn3.cur_latency_index = pmo->scratch.pmo_dcn3.min_latency_index; + success = true; + } + } + } else { + success = true; + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = pmo->scratch.pmo_dcn3.cur_latency_index; + + for (stream_index = 0; stream_index < in_out->optimized_display_config->display_config.num_streams; stream_index++) { + set_reserved_time_on_all_planes_with_stream_index(in_out->optimized_display_config, stream_index, + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]]); + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h new file mode 100644 index 000000000000..f00bd9e72a86 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_DCN3_H__ +#define __DML2_PMO_DCN3_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c new file mode 100644 index 000000000000..c26e100fcaf2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -0,0 +1,2390 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml2_pmo_dcn4_fams2.h" + +static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding +static const double MIN_BLANK_STUTTER_FACTOR = 3.0; + +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { + // VActive Preferred + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then SVP + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { + // VActive only is preferred + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then VActive + VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then VBlank only + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then SVP + VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then SVP + DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then SVP + SVP + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then DRR + VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // All VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank }, + .allow_state_increase = false, + }, + + // All Vblank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); + + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1 || in_out->instance->options->disable_dyn_odm) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy) +{ + enum dml2_pstate_method variant_strategy = 0; + + switch (base_strategy) { + case dml2_pstate_method_vactive: + variant_strategy = dml2_pstate_method_fw_vactive_drr; + break; + case dml2_pstate_method_vblank: + variant_strategy = dml2_pstate_method_fw_vblank_drr; + break; + case dml2_pstate_method_fw_svp: + variant_strategy = dml2_pstate_method_fw_svp_drr; + break; + case dml2_pstate_method_fw_vactive_drr: + case dml2_pstate_method_fw_vblank_drr: + case dml2_pstate_method_fw_svp_drr: + case dml2_pstate_method_fw_drr: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + /* no variant for this mode */ + variant_strategy = base_strategy; + } + + return variant_strategy; +} + +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) +{ + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; + + switch (stream_count) { + case 1: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_1_display; + break; + case 2: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_2_display; + break; + case 3: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_3_display; + break; + case 4: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_4_display; + break; + default: + break; + } + + return expanded_strategy_list; +} + +static unsigned int get_num_expanded_strategies( + struct dml2_pmo_init_data *init_data, + int stream_count) +{ + return init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]; +} + +static void insert_strategy_into_expanded_list( + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, + const int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + if (expanded_strategy_list && num_expanded_strategies) { + memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + (*num_expanded_strategies)++; + } +} + +static void expand_base_strategy( + const struct dml2_pmo_pstate_strategy *base_strategy, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + bool skip_to_next_stream; + bool expanded_strategy_added; + bool skip_iteration; + unsigned int i, j; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + } + + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + + i = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (stream_iteration_indices[0] < stream_count) { + skip_to_next_stream = false; + expanded_strategy_added = false; + skip_iteration = false; + + /* determine what to do for this iteration */ + if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { + /* decrement count and assign method */ + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; + num_streams_per_method[stream_iteration_indices[i]] -= 1; + + if (i >= stream_count - 1) { + /* insert into strategy list */ + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies); + expanded_strategy_added = true; + } else { + /* skip to next stream */ + skip_to_next_stream = true; + } + } else { + skip_iteration = true; + } + + /* prepare for next iteration */ + if (skip_to_next_stream) { + i++; + } else { + /* restore count */ + if (!skip_iteration) { + num_streams_per_method[stream_iteration_indices[i]] += 1; + } + + /* increment iteration count */ + stream_iteration_indices[i]++; + + /* if iterations are complete, or last stream was reached */ + if ((stream_iteration_indices[i] >= stream_count || expanded_strategy_added) && i > 0) { + /* reset per stream index, decrement i */ + stream_iteration_indices[i] = 0; + i--; + + /* restore previous stream's count and increment index */ + num_streams_per_method[stream_iteration_indices[i]] += 1; + stream_iteration_indices[i]++; + } + } + } +} + + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int stream_count) +{ + bool valid = true; + unsigned int i; + + /* check all restrictions are met */ + for (i = 0; i < stream_count; i++) { + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } + + return valid; +} + +static void expand_variant_strategy( + const struct dml2_pmo_pstate_strategy *base_strategy, + const unsigned int stream_count, + const bool should_permute, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + if (should_permute) { + /* permutations are permitted, proceed to expand */ + expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } else { + /* no permutations allowed, so add to list now */ + insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } + } + + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; + } + } +} + +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + unsigned int i; + + /* expand every explicit base strategy (except all DRR) */ + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies); + expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies); + } +} + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + int i = 0; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int base_list_size = 0; + const struct dml2_pmo_pstate_strategy *base_list = NULL; + unsigned int *expanded_list_size = NULL; + struct dml2_pmo_pstate_strategy *expanded_list = NULL; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; + + pmo->fams_params.v2.subvp.refresh_rate_limit_max = 175; + pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0; + pmo->fams_params.v2.drr.refresh_rate_limit_max = 1000; + pmo->fams_params.v2.drr.refresh_rate_limit_min = 119; + + pmo->options = in_out->options; + + /* generate permutations of p-state configs from base strategy list */ + for (i = 0; i < PMO_DCN4_MAX_DISPLAYS; i++) { + switch (i+1) { + case 1: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_1_display; + base_list_size = base_strategy_list_1_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display; + + break; + case 2: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_2_display; + base_list_size = base_strategy_list_2_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display; + + break; + case 3: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_3_display; + base_list_size = base_strategy_list_3_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display; + + break; + case 4: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_4_display; + base_list_size = base_strategy_list_4_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display; + + break; + } + + DML_ASSERT(base_list_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_list, + base_list_size, + i + 1, + expanded_list, + expanded_list_size); + } + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + state->unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + state->unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + state->unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + state->unoptimizable_streams[i] = true; + } + + state->performed = true; + + return true; +} + +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) +{ + *bit_field = *bit_field | (0x1 << bit_offset); +} + +static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) +{ + if (bit_field & (0x1 << bit_offset)) + return true; + + return false; +} + +static void build_synchronized_timing_groups( + struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config) +{ + unsigned int i, j; + struct dml2_timing_cfg *master_timing; + + unsigned int stream_mapped_mask = 0; + unsigned int num_timing_groups = 0; + unsigned int timing_group_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* clear all group masks */ + memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); + memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); + memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); + s->pmo_dcn4.num_timing_groups = 0; + + for (i = 0; i < display_config->display_config.num_streams; i++) { + master_timing = &display_config->display_config.stream_descriptors[i].timing; + + /* only need to build group of this stream is not in a group already */ + if (is_bit_set_in_bitfield(stream_mapped_mask, i)) { + continue; + } + set_bit_in_bitfield(&stream_mapped_mask, i); + timing_group_idx = num_timing_groups; + num_timing_groups++; + + /* trivially set default timing group to itself */ + set_bit_in_bitfield(&s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i); + s->pmo_dcn4.group_line_time_us[timing_group_idx] = (double)master_timing->h_total / master_timing->pixel_clock_khz * 1000.0; + + /* if drr is in use, timing is not sychnronizable */ + if (master_timing->drr_config.enabled) { + s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); + continue; + } + + /* find synchronizable timing groups */ + for (j = i + 1; j < display_config->display_config.num_streams; j++) { + if (memcmp(master_timing, + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0) { + set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); + set_bit_in_bitfield(&stream_mapped_mask, j); + } + } + } + + s->pmo_dcn4.num_timing_groups = num_timing_groups; +} + +static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + bool valid = true; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + /* check if stream has enough vactive margin */ + valid &= is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.stream_vactive_capability_mask, i); + } + } + + return valid; +} + +static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + + bool synchronizable = true; + + /* find first vblank stream index and compare the timing group mask */ + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + if (mask != pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[i]) { + /* vblank streams are not synchronizable */ + synchronizable = false; + } + break; + } + } + + return synchronizable; +} + +static unsigned int calc_svp_microschedule(const struct dml2_pstate_meta *pstate_meta) +{ + return pstate_meta->contention_delay_otg_vlines + + pstate_meta->method_subvp.programming_delay_otg_vlines + + pstate_meta->method_subvp.phantom_vtotal + + pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + pstate_meta->blackout_otg_vlines; +} + +static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + for (i = 0; i < DML2_MAX_PLANES; i++) { + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_pstate_meta *stream_pstate_meta; + + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; + + if (!stream_descriptor->timing.drr_config.enabled) + return false; + + /* cannot support required vtotal */ + if (stream_pstate_meta->method_drr.stretched_vtotal > stream_pstate_meta->max_vtotal) { + return false; + } + + /* check rr is within bounds */ + if (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { + return false; + } + + /* check required stretch is allowed */ + if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && + stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > (int)stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { + return false; + } + } + } + + return true; +} + +static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_pstate_meta *stream_pstate_meta; + unsigned int microschedule_vlines; + unsigned int i; + unsigned int mcaches_per_plane; + unsigned int total_mcaches_required = 0; + + unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; + + /* confirm timing it is not a centered timing */ + for (i = 0; i < display_config->display_config.num_planes; i++) { + plane_descriptor = &display_config->display_config.plane_descriptors[i]; + mcaches_per_plane = 0; + + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 + + display_config->stage2.mcache_allocations[i].num_mcaches_plane1 - + (display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0); + } + + if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) { + num_planes_per_stream[plane_descriptor->stream_index]++; + + /* check recout height covers entire otg vactive, and single plane */ + if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || + !plane_descriptor->composition.rect_out_height_spans_vactive || + plane_descriptor->composition.rotation_angle != dml2_rotation_0) { + return false; + } + + /* phantom requires same number of mcaches as main */ + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane *= 2; + } + } + total_mcaches_required += mcaches_per_plane; + } + + if (total_mcaches_required > pmo->soc_bb->num_dcc_mcaches) { + /* too many mcaches required */ + return false; + } + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; + + if (stream_descriptor->overrides.disable_subvp) { + return false; + } + + microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_pstate_meta[i]); + + /* block if using an interlaced timing */ + if (stream_descriptor->timing.interlaced) { + return false; + } + + /* 1) svp main stream's vactive must be able to fit the microschedule + * 2) refresh rate must be within the allowed bounds + */ + if (microschedule_vlines >= stream_descriptor->timing.v_active || + (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { + return false; + } + } + } + + return true; +} + +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +{ + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; + scratch->pmo_dcn4.num_pstate_candidates++; +} + +static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy) +{ + enum dml2_pstate_method method = dml2_pstate_method_na; + + switch (override_strategy) { + case dml2_uclk_pstate_change_strategy_force_vactive: + method = dml2_pstate_method_vactive; + break; + case dml2_uclk_pstate_change_strategy_force_vblank: + method = dml2_pstate_method_vblank; + break; + case dml2_uclk_pstate_change_strategy_force_drr: + method = dml2_pstate_method_fw_drr; + break; + case dml2_uclk_pstate_change_strategy_force_mall_svp: + method = dml2_pstate_method_fw_svp; + break; + case dml2_uclk_pstate_change_strategy_force_mall_full_frame: + case dml2_uclk_pstate_change_strategy_auto: + default: + method = dml2_pstate_method_na; + } + + return method; +} + +static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method) +{ + enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto; + + switch (method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_pstate_method_fw_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + override_strategy = dml2_uclk_pstate_change_strategy_auto; + } + + return override_strategy; +} + +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) +{ + unsigned int i; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method)) + return false; + } + } + + return true; +} + +static void build_method_scheduling_params( + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta, + struct dml2_pstate_meta *stream_pstate_meta) +{ + stream_method_pstate_meta->allow_time_us = + (double)((int)stream_method_pstate_meta->allow_end_otg_vline - (int)stream_method_pstate_meta->allow_start_otg_vline) * + stream_pstate_meta->otg_vline_time_us; + if (stream_method_pstate_meta->allow_time_us >= stream_method_pstate_meta->period_us) { + /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/ + stream_method_pstate_meta->disallow_time_us = 0.0; + } else { + stream_method_pstate_meta->disallow_time_us = + stream_method_pstate_meta->period_us - stream_method_pstate_meta->allow_time_us; + } +} + +static struct dml2_pstate_per_method_common_meta *get_per_method_common_meta( + struct dml2_pmo_instance *pmo, + enum dml2_pstate_method stream_pstate_method, + int stream_idx) +{ + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta = NULL; + + switch (stream_pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vactive.common; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vblank.common; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_subvp.common; + break; + case dml2_pstate_method_fw_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_drr.common; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + stream_method_pstate_meta = NULL; + } + + return stream_method_pstate_meta; +} + +static bool is_timing_group_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy, + const unsigned int timing_group_idx, + struct dml2_pstate_per_method_common_meta *group_pstate_meta) +{ + unsigned int i; + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta; + + unsigned int base_stream_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* find base stream idx */ + for (base_stream_idx = 0; base_stream_idx < display_cfg->display_config.num_streams; base_stream_idx++) { + if (is_bit_set_in_bitfield(s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], base_stream_idx)) { + /* master stream found */ + break; + } + } + + /* init allow start and end lines for timing group */ + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); + if (!stream_method_pstate_meta) + return false; + + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; + group_pstate_meta->period_us = stream_method_pstate_meta->period_us; + for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); + if (!stream_method_pstate_meta) + continue; + + if (group_pstate_meta->allow_start_otg_vline < stream_method_pstate_meta->allow_start_otg_vline) { + /* set group allow start to larger otg vline */ + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; + } + + if (group_pstate_meta->allow_end_otg_vline > stream_method_pstate_meta->allow_end_otg_vline) { + /* set group allow end to smaller otg vline */ + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; + } + + /* check waveform still has positive width */ + if (group_pstate_meta->allow_start_otg_vline >= group_pstate_meta->allow_end_otg_vline) { + /* timing group is not schedulable */ + return false; + } + } + } + + /* calculate the rest of the meta */ + build_method_scheduling_params(group_pstate_meta, &pmo->scratch.pmo_dcn4.stream_pstate_meta[base_stream_idx]); + + return group_pstate_meta->allow_time_us > 0.0 && + group_pstate_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us; +} + +static bool is_config_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy) +{ + unsigned int i, j; + bool schedulable; + struct dml2_pmo_scratch *s = &pmo->scratch; + + double max_allow_delay_us = 0.0; + + memset(s->pmo_dcn4.group_common_pstate_meta, 0, sizeof(s->pmo_dcn4.group_common_pstate_meta)); + memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES); + + /* search for a general solution to the schedule */ + + /* STAGE 0: Early return for special cases */ + if (display_cfg->display_config.num_streams == 0) { + return true; + } + + /* STAGE 1: confirm allow waves overlap for synchronizable streams */ + schedulable = true; + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; + s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_pstate_meta[i])) { + /* synchronized timing group was not schedulable */ + schedulable = false; + break; + } + max_allow_delay_us += s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us; + } + + if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) { + /* 1. the only timing group was schedulable, so early pass + * 2. one of the timing groups was not schedulable, so early fail */ + return schedulable; + } + + /* STAGE 2: Check allow can't be masked entirely by other disallows */ + schedulable = true; + + /* sort disallow times from greatest to least */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; + double jp1_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; + if (j_disallow_us < jp1_disallow_us) { + /* swap as A < B */ + swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* Check worst case disallow region occurs in the middle of allow for the + * other display, or when >2 streams continue to halve the remaining allow time. + */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + if (s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us <= 0.0) { + /* this timing group always allows */ + continue; + } + + double max_allow_time_us = s->pmo_dcn4.group_common_pstate_meta[i].allow_time_us; + for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) { + unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; + /* stream can't overlap itself */ + if (i != sorted_j && s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us > 0.0) { + max_allow_time_us = math_min2( + s->pmo_dcn4.group_common_pstate_meta[sorted_j].allow_time_us, + (max_allow_time_us - s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us) / 2); + + if (max_allow_time_us < 0.0) { + /* failed exit early */ + break; + } + } + } + + if (max_allow_time_us <= 0.0) { + /* not enough time for microschedule in the worst case */ + schedulable = false; + break; + } + } + + if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + return true; + } + + /* STAGE 3: check larger allow can fit period of all other streams */ + schedulable = true; + + /* sort periods from greatest to least */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; + double jp1_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; + if (j_period_us < jp1_period_us) { + /* swap as A < B */ + swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* check larger allow can fit period of all other streams */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups - 1; i++) { + unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i]; + unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; + + if (s->pmo_dcn4.group_common_pstate_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_pstate_meta[sorted_ip1].period_us || + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { + schedulable = false; + break; + } + } + + if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + return true; + } + + /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ + if (s->pmo_dcn4.num_timing_groups == 2 && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { + double period_ratio; + double max_shift_us; + double shift_per_period; + + /* default period_0 > period_1 */ + unsigned int lrg_idx = 0; + unsigned int sml_idx = 1; + if (s->pmo_dcn4.group_common_pstate_meta[0].period_us < s->pmo_dcn4.group_common_pstate_meta[1].period_us) { + /* period_0 < period_1 */ + lrg_idx = 1; + sml_idx = 0; + } + period_ratio = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us; + shift_per_period = s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio)); + max_shift_us = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us; + max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us; + + if (shift_per_period > 0.0 && + shift_per_period < s->pmo_dcn4.group_common_pstate_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us && + max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + schedulable = true; + } + } + + return schedulable; +} + +static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pstate_method stream_pstate_method, + unsigned int stream_index) +{ + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; + bool strategy_matches_drr_requirements = true; + + /* check if strategy is compatible with stream drr capability and strategy */ + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + display_cfg->display_config.num_streams > 1 && + stream_descriptor->timing.drr_config.enabled && + (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { + /* DRR is active, so config may become unschedulable */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR is variable, fw exclusive methods require DRR to be clamped */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && + pmo->options->disable_drr_var_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR variable is active, but policy blocks DRR for p-state when this happens */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_var || + !stream_descriptor->timing.drr_config.enabled || + stream_descriptor->timing.drr_config.disallowed)) { + /* DRR variable strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_clamped || + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { + /* DRR fixed strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && + pmo->options->disable_fams2) { + /* FW modes require FAMS2 */ + strategy_matches_drr_requirements = false; + } + + return strategy_matches_drr_requirements; +} + +static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy) +{ + struct dml2_pmo_scratch *s = &pmo->scratch; + + unsigned int stream_index = 0; + + unsigned int svp_count = 0; + unsigned int svp_stream_mask = 0; + unsigned int drr_count = 0; + unsigned int drr_stream_mask = 0; + unsigned int vactive_count = 0; + unsigned int vactive_stream_mask = 0; + unsigned int vblank_count = 0; + unsigned int vblank_stream_mask = 0; + + bool strategy_matches_forced_requirements = true; + bool strategy_matches_drr_requirements = true; + + // Tabulate everything + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { + strategy_matches_forced_requirements = false; + break; + } + + strategy_matches_drr_requirements &= + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); + + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + svp_count++; + set_bit_in_bitfield(&svp_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + drr_count++; + set_bit_in_bitfield(&drr_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + vactive_count++; + set_bit_in_bitfield(&vactive_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + vblank_count++; + set_bit_in_bitfield(&vblank_stream_mask, stream_index); + } + } + + if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) + return false; + + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) + return false; + + if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) + return false; + + if (drr_count > 0 && (pmo->options->disable_drr_var || !all_timings_support_drr(pmo, display_cfg, drr_stream_mask))) + return false; + + if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) + return false; + + return is_config_schedulable(pmo, display_cfg, pstate_strategy); +} + +static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned int i; + int min_vactive_margin_us = 0xFFFFFFF; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) + min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; + } + } + + return min_vactive_margin_us; +} + +static int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned char i; + int max_vactive_fill_us = 0; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].vactive_det_fill_delay_us[dml2_pstate_type_uclk] > max_vactive_fill_us) + max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].vactive_det_fill_delay_us[dml2_pstate_type_uclk]; + } + } + + return max_vactive_fill_us; +} + +static void build_pstate_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + const struct dml2_ip_capabilities *ip_caps = pmo->ip_caps; + const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index]; + const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index]; + const struct dml2_timing_cfg *timing = &stream_descriptor->timing; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; + + /* worst case all other streams require some programming at the same time, 0 if only 1 stream */ + unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us + + (unsigned int)math_max3(ip_caps->fams2.subvp_programming_delay_us, ip_caps->fams2.drr_programming_delay_us, ip_caps->fams2.allow_programming_delay_us)) * + (display_config->display_config.num_streams - 1); + + /* common */ + stream_pstate_meta->valid = true; + stream_pstate_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; + stream_pstate_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; + stream_pstate_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->nom_vtotal * timing->h_total); + stream_pstate_meta->nom_frame_time_us = + (double)stream_pstate_meta->nom_vtotal * stream_pstate_meta->otg_vline_time_us; + stream_pstate_meta->vblank_start = timing->v_blank_end + timing->v_active; + + if (stream_descriptor->timing.drr_config.enabled == true) { + if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9); + } else { + /* assume min of 48Hz */ + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + (48000000.0 * stream_descriptor->timing.h_total) * 1e9); + } + } else { + stream_pstate_meta->max_vtotal = stream_pstate_meta->nom_vtotal; + } + stream_pstate_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->max_vtotal * timing->h_total); + stream_pstate_meta->max_frame_time_us = + (double)stream_pstate_meta->max_vtotal * stream_pstate_meta->otg_vline_time_us; + + stream_pstate_meta->scheduling_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->contention_delay_otg_vlines = + (unsigned int)math_ceil(contention_delay_us / stream_pstate_meta->otg_vline_time_us); + /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */ + stream_pstate_meta->allow_to_target_delay_otg_vlines = + (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_pstate_meta->otg_vline_time_us)) + 1; + stream_pstate_meta->min_allow_width_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_pstate_meta->otg_vline_time_us); + /* this value should account for urgent latency */ + stream_pstate_meta->blackout_otg_vlines = + (unsigned int)math_ceil(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us / + stream_pstate_meta->otg_vline_time_us); + + /* scheduling params should be built based on the worst case for allow_time:disallow_time */ + + /* vactive */ + if (display_config->display_config.num_streams == 1) { + /* for single stream, guarantee at least an instant of allow */ + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( + math_max2(0.0, + timing->v_active - math_max2(1.0, stream_pstate_meta->min_allow_width_otg_vlines) - stream_pstate_meta->blackout_otg_vlines)); + } else { + /* for multi stream, bound to a max fill time defined by IP caps */ + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = + (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_pstate_meta->otg_vline_time_us); + } + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us = stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_pstate_meta->otg_vline_time_us; + + if (stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) { + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = + timing->v_blank_end + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; + } else { + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = 0; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = 0; + } + stream_pstate_meta->method_vactive.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vactive.common, stream_pstate_meta); + + /* vblank */ + stream_pstate_meta->method_vblank.common.allow_start_otg_vline = stream_pstate_meta->vblank_start; + stream_pstate_meta->method_vblank.common.allow_end_otg_vline = + stream_pstate_meta->method_vblank.common.allow_start_otg_vline + 1; + stream_pstate_meta->method_vblank.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vblank.common, stream_pstate_meta); + + /* subvp */ + stream_pstate_meta->method_subvp.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.phantom_vactive = + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_info->phantom_min_v_active; + stream_pstate_meta->method_subvp.phantom_vfp = + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines; + /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/ + stream_pstate_meta->method_subvp.phantom_vtotal = + stream_info->phantom_v_startup + + stream_pstate_meta->method_subvp.phantom_vfp + + 1 + + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vactive; + stream_pstate_meta->method_subvp.common.allow_start_otg_vline = + stream_descriptor->timing.v_blank_end + + stream_pstate_meta->contention_delay_otg_vlines + + stream_pstate_meta->method_subvp.programming_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vtotal + + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_subvp.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; + stream_pstate_meta->method_subvp.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_subvp.common, stream_pstate_meta); + + /* drr */ + stream_pstate_meta->method_drr.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_drr.common.allow_start_otg_vline = + stream_pstate_meta->vblank_start + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_drr.common.period_us = stream_pstate_meta->nom_frame_time_us; + if (display_config->display_config.num_streams <= 1) { + /* only need to stretch vblank for blackout time */ + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; + } else { + /* multi display needs to always be schedulable */ + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal * 2 + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; + } + stream_pstate_meta->method_drr.common.allow_end_otg_vline = + stream_pstate_meta->method_drr.stretched_vtotal - + stream_pstate_meta->blackout_otg_vlines; + build_method_scheduling_params(&stream_pstate_meta->method_drr.common, stream_pstate_meta); +} + +static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; + + stream_svp_meta->valid = true; + + /* PMO FAMS2 precaulcates these values */ + stream_svp_meta->v_active = stream_pstate_meta->method_subvp.phantom_vactive; + stream_svp_meta->v_front_porch = stream_pstate_meta->method_subvp.phantom_vfp; + stream_svp_meta->v_total = stream_pstate_meta->method_subvp.phantom_vtotal; +} + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + struct dml2_pmo_scratch *s = &pmo->scratch; + + struct display_configuation_with_meta *display_config; + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; + struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; + unsigned int strategy_list_size = 0; + unsigned int plane_index, stream_index, i; + bool build_override_strategy = true; + + state->performed = true; + in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + + display_config = in_out->base_display_config; + display_config->display_config.overrides.enable_subvp_implicit_pmo = true; + + memset(s, 0, sizeof(struct dml2_pmo_scratch)); + + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; + pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; + + set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); + + state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto; + override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] = + uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy); + } + + // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) + set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); + + /* FAMS2 meta */ + build_pstate_meta_per_stream(pmo, display_config, stream_index); + + /* SVP meta */ + build_subvp_meta_per_stream(pmo, display_config, stream_index); + } + + /* get synchronized timing groups */ + build_synchronized_timing_groups(pmo, display_config); + + if (build_override_strategy) { + /* build expanded override strategy list (no permutations) */ + override_base_strategy.allow_state_increase = true; + s->pmo_dcn4.num_expanded_override_strategies = 0; + insert_strategy_into_expanded_list(&override_base_strategy, + display_config->display_config.num_streams, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + expand_variant_strategy(&override_base_strategy, + display_config->display_config.num_streams, + false, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + + /* use override strategy list */ + strategy_list = s->pmo_dcn4.expanded_override_strategy_list; + strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies; + } else { + /* use predefined strategy list */ + strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + } + + if (!strategy_list || strategy_list_size == 0) + return false; + + s->pmo_dcn4.num_pstate_candidates = 0; + + for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); + } + } + + if (s->pmo_dcn4.num_pstate_candidates > 0) { + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true; + s->pmo_dcn4.cur_pstate_candidate = -1; + return true; + } else { + return false; + } +} + +static void reset_display_configuration(struct display_configuation_with_meta *display_config) +{ + unsigned int plane_index; + unsigned int stream_index; + struct dml2_plane_parameters *plane; + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + display_config->stage3.stream_svp_meta[stream_index].valid = false; + } + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Unset SubVP + plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; + + // Remove reserve time + plane->overrides.reserved_vblank_time_ns = 0; + + // Reset strategy to auto + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na; + } +} + +static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr; + } + } +} + +static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank; + } + } +} + +static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr; + } + } +} + +static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + if (!pmo->options->disable_vactive_det_fill_bw_pad) { + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] = + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + } + } + } +} + +static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; + + if (!pmo->options->disable_vactive_det_fill_bw_pad) { + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] = + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + } + } + } +} + +static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_instance *pmo, int strategy_index) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + bool fams2_required = false; + bool success = true; + unsigned int stream_index; + + reset_display_configuration(display_config); + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { + success = false; + break; + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) { + setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { + setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) { + fams2_required = true; + setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + fams2_required = true; + setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + fams2_required = true; + setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + fams2_required = true; + setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + fams2_required = true; + setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } + } + + /* copy FAMS2 meta */ + if (success) { + display_config->stage3.fams2_required = fams2_required; + memcpy(&display_config->stage3.stream_pstate_meta, + &scratch->pmo_dcn4.stream_pstate_meta, + sizeof(struct dml2_pstate_meta) * DML2_MAX_PLANES); + } + + return success; +} + +static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) +{ + int min_time_us = 0xFFFFFF; + unsigned int plane_index = 0; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) + min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; + } + } + return min_time_us; +} + +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + bool p_state_supported = true; + unsigned int stream_index; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + int MIN_VACTIVE_MARGIN_VBLANK = 0; + int MIN_VACTIVE_MARGIN_DRR = 0; + int REQUIRED_RESERVED_TIME = 0; + + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; + MIN_VACTIVE_MARGIN_DRR = INT_MIN; + REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + + if (s->pmo_dcn4.cur_pstate_candidate < 0) + return false; + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + struct dml2_pstate_meta *stream_pstate_meta = &s->pmo_dcn4.stream_pstate_meta[stream_index]; + + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || + get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < + REQUIRED_RESERVED_TIME || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { + p_state_supported = false; + break; + } + } + + return p_state_supported; +} + +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { + s->pmo_dcn4.cur_latency_index++; + + success = true; + } + } + + if (!success) { + s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; + s->pmo_dcn4.cur_pstate_candidate++; + + if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { + success = true; + } + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; + setup_display_config(in_out->optimized_display_config, in_out->instance, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); + } + + return success; +} + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + bool stutter_period_meets_z8_eco = true; + bool z8_stutter_optimization_too_expensive = false; + bool stutter_optimization_too_expensive = false; + double line_time_us, vblank_nom_time_us; + + unsigned int i; + + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + return false; // Unexpected SoCBB setup + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[i].active_latency_hiding_us < + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us + pmo->soc_bb->power_management_parameters.z8_min_idle_time) { + stutter_period_meets_z8_eco = false; + break; + } + } + + for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000; + vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom; + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { + z8_stutter_optimization_too_expensive = true; + break; + } + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { + stutter_optimization_too_expensive = true; + break; + } + } + + pmo->scratch.pmo_dcn4.num_stutter_candidates = 0; + pmo->scratch.pmo_dcn4.cur_stutter_candidate = 0; + + if (stutter_period_meets_z8_eco && !z8_stutter_optimization_too_expensive) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = true; + } + } else { + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false; + } + + if (!stutter_optimization_too_expensive && pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + } + + if (pmo->scratch.pmo_dcn4.num_stutter_candidates == 0) + success = false; + + return success; +} + +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->scratch.pmo_dcn4.z8_vblank_optimizable && + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { + success = false; + break; + } + if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { + success = false; + break; + } + } + + return success; +} + +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int i; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (!in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); + } + + success = true; + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h new file mode 100644 index 000000000000..6baab7ad6ecc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_FAMS2_DCN4_H__ +#define __DML2_PMO_FAMS2_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c new file mode 100644 index 000000000000..55d2464365d0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn4_fams2.h" +#include "dml2_pmo_dcn3.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + return false; +} + +static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + return true; +} + +static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + return false; +} + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_pmo_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->initialize = pmo_dcn3_initialize; + + out->optimize_dcc_mcache = pmo_dcn3_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn3_init_for_vmin; + out->test_for_vmin = pmo_dcn3_test_for_vmin; + out->optimize_for_vmin = pmo_dcn3_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn3_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn3_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn3_optimize_for_pstate_support; + + out->init_for_stutter = dummy_init_for_stutter; + out->test_for_stutter = dummy_test_for_stutter; + out->optimize_for_stutter = dummy_optimize_for_stutter; + + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = pmo_dcn4_fams2_initialize; + + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn4_fams2_init_for_vmin; + out->test_for_vmin = pmo_dcn4_fams2_test_for_vmin; + out->optimize_for_vmin = pmo_dcn4_fams2_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn4_fams2_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn4_fams2_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn4_fams2_optimize_for_pstate_support; + + out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; + out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; + out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h new file mode 100644 index 000000000000..b90f6263cd85 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_FACTORY_H__ +#define __DML2_PMO_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c new file mode 100644 index 000000000000..e17b5ceba447 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "lib_float_math.h" + +#define ASSERT(condition) + +#define isNaN(number) ((number) != (number)) + + /* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +double math_mod(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 - arg1 * ((int)(arg1 / arg2)); +} + +double math_min2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 < arg2 ? arg1 : arg2; +} + +double math_max2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 > arg2 ? arg1 : arg2; +} + +double math_floor2(const double arg, const double significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance)) * significance; +} + +double math_floor(const double arg) +{ + return ((int)(arg)); +} + +double math_ceil(const double arg) +{ + return (int)(arg + 0.99999); +} + +double math_ceil2(const double arg, const double significance) +{ + return ((int)(arg / significance + 0.99999)) * significance; +} + +double math_max3(double v1, double v2, double v3) +{ + return v3 > math_max2(v1, v2) ? v3 : math_max2(v1, v2); +} + +double math_max4(double v1, double v2, double v3, double v4) +{ + return v4 > math_max3(v1, v2, v3) ? v4 : math_max3(v1, v2, v3); +} + +double math_max5(double v1, double v2, double v3, double v4, double v5) +{ + return math_max3(v1, v2, v3) > math_max2(v4, v5) ? math_max3(v1, v2, v3) : math_max2(v4, v5); +} + +float math_pow(float a, float exp) +{ + double temp; + if ((int)exp == 0) + return 1; + temp = math_pow(a, (float)((int)(exp / 2))); + if (((int)exp % 2) == 0) { + return (float)(temp * temp); + } else { + if ((int)exp > 0) + return (float)(a * temp * temp); + else + return (float)((temp * temp) / a); + } +} + +double math_fabs(double a) +{ + if (a > 0) + return (a); + else + return (-a); +} + +float math_log(float a, float b) +{ + int *const exp_ptr = (int *)(&a); + int x = *exp_ptr; + const int log_2 = ((x >> 23) & 255) - 128; + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3; + + if (b > 2.00001 || b < 1.99999) + return (a + log_2) / math_log(b, 2); + else + return (a + log_2); +} + +float math_log2(float a) +{ + return math_log(a, 2.0); +} + +// approximate log2 value of a input +// - precise if the input pwr of 2, else the approximation will be an integer = floor(actual_log2) +unsigned int math_log2_approx(unsigned int a) +{ + unsigned int log2_val = 0; + while (a > 1) { + a = a >> 1; + log2_val++; + } + return log2_val; +} + +double math_round(double a) +{ + const double round_pt = 0.5; + + return math_floor(a + round_pt); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h new file mode 100644 index 000000000000..e13b0c5939b0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __LIB_FLOAT_MATH_H__ +#define __LIB_FLOAT_MATH_H__ + +double math_mod(const double arg1, const double arg2); +double math_min2(const double arg1, const double arg2); +double math_max2(const double arg1, const double arg2); +double math_floor2(const double arg, const double significance); +double math_floor(const double arg); +double math_ceil(const double arg); +double math_ceil2(const double arg, const double significance); +double math_max3(double v1, double v2, double v3); +double math_max4(double v1, double v2, double v3, double v4); +double math_max5(double v1, double v2, double v3, double v4, double v5); +float math_pow(float a, float exp); +double math_fabs(double a); +float math_log(float a, float b); +float math_log2(float a); +unsigned int math_log2_approx(unsigned int a); +double math_round(double a); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c new file mode 100644 index 000000000000..5a33e2f357f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml_top.h" +#include "dml2_internal_shared_types.h" +#include "dml2_top_soc15.h" + +unsigned int dml2_get_instance_size_bytes(void) +{ + return sizeof(struct dml2_instance); +} + +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + switch (in_out->options.project_id) { + case dml2_project_dcn4x_stage1: + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + return dml2_top_soc15_initialize_instance(in_out); + case dml2_project_invalid: + default: + return false; + } +} + +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.check_mode_supported) + return false; + + return in_out->dml2_instance->funcs.check_mode_supported(in_out); +} + +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mode_programming) + return false; + + return in_out->dml2_instance->funcs.build_mode_programming(in_out); +} + +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mcache_programming) + return false; + + return in_out->dml2_instance->funcs.build_mcache_programming(in_out); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c new file mode 100644 index 000000000000..5e14d85821e2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_legacy.h" +#include "dml2_top_soc15.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "display_mode_core_structs.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h new file mode 100644 index 000000000000..14d0ae03dce6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_LEGACY_H__ +#define __DML2_TOP_LEGACY_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out); +#endif /* __DML2_TOP_LEGACY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c new file mode 100644 index 000000000000..4a7c4c62111e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c @@ -0,0 +1,1170 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_soc15.h" +#include "dml2_mcg_factory.h" +#include "dml2_dpmm_factory.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "lib_float_math.h" +#include "dml2_debug.h" +static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; +} + +static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = 0; +} + +static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) +{ + memcpy(dst, src, sizeof(struct display_configuation_with_meta)); +} + +static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + state->performed = true; + + return true; +} + +static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + return state->min_clk_index_for_latency == 0; +} + +static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) +{ + bool result = false; + + if (params->display_config->stage1.min_clk_index_for_latency > 0) { + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + params->optimized_display_config->stage1.min_clk_index_for_latency--; + result = true; + } + + return result; +} + +static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + bool mcache_success = false; + bool result = false; + + memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); + + l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; + l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; + l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + + result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations + + if (result) { + l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; + + dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); + + l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; + l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; + l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; + + mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works + + memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); + } + + return mcache_success; +} + +static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + bool optimize_success = false; + + if (params->last_candidate_supported == false) + return false; + + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + + l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; + l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; + l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; + l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; + l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; + + optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); + + return optimize_success; +} + +static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->vmin.init_params.instance = ¶ms->dml->pmo_instance; + l->vmin.init_params.base_display_config = params->display_config; + return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); +} + +static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; + l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; + l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; + return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); +} + +static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + if (params->last_candidate_supported == false) + return false; + + l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; + l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; + l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; + return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); +} + +static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); +} + +static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.test_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); +} + +static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.optimize_params.base_display_config = params->display_config; + l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; + l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; + + return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); +} + +static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; + l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; + return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + bool test_passed = false; + bool optimize_succeeded = true; + bool candidate_validation_passed = true; + struct optimization_init_function_params init_params = { 0 }; + struct optimization_test_function_params test_params = { 0 }; + struct optimization_optimize_function_params optimize_params = { 0 }; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + + init_params.locals = &l->init_function_locals; + init_params.dml = params->dml; + init_params.display_config = &l->cur_candidate_display_cfg; + + if (params->init_function && !params->init_function(&init_params)) + return false; + + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->cur_candidate_display_cfg; + + test_passed = params->test_function(&test_params); + + while (!test_passed && optimize_succeeded) { + memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); + + optimize_params.locals = &l->optimize_function_locals; + optimize_params.dml = params->dml; + optimize_params.display_config = &l->cur_candidate_display_cfg; + optimize_params.optimized_display_config = &l->next_candidate_display_cfg; + optimize_params.last_candidate_supported = candidate_validation_passed; + + optimize_succeeded = params->optimize_function(&optimize_params); + + if (optimize_succeeded) { + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + + if (l->next_candidate_display_cfg.stage3.performed) + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; + else + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; + candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); + l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + } + + if (optimize_succeeded && candidate_validation_passed) { + memset(&test_params, 0, sizeof(struct optimization_test_function_params)); + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->next_candidate_display_cfg; + test_passed = params->test_function(&test_params); + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); + + // If optimization is not all or nothing, then store partial progress in output + if (!params->all_or_nothing) + copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); + } + } + + if (test_passed) + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return test_passed; +} + +static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + int highest_state, lowest_state, cur_state; + bool supported = false; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; + lowest_state = 0; + + while (highest_state > lowest_state) { + cur_state = (highest_state + lowest_state) / 2; + + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + l->mode_support_params.min_clk_index = cur_state; + supported = params->dml->core_instance.mode_support(&l->mode_support_params); + + if (supported) { + l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + highest_state = cur_state; + } else { + lowest_state = cur_state + 1; + } + } + l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; + + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return true; +} + +/* +* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. +* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means +* that the horizontal viewport does not span more than 2 cache slices. +* +* It optionally also can apply a constant shift to all the cache boundaries. +*/ +static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; +static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; + +static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, + int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) +{ + const int MAX_VP = 0xFFFFFF; + int left_cache_id; + int right_cache_id; + int range_start; + int range_end; + bool success = false; + + if (num_boundaries <= 1) { + if (first_offset && second_offset) { + *first_offset = 0; + *second_offset = -1; + } + success = true; + return success; + } else { + range_start = 0; + for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { + range_end = mcache_boundaries[left_cache_id] - shift - 1; + + if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) + break; + + range_start = range_end + 1; + } + + range_end = MAX_VP; + for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { + if (right_cache_id >= 0) + range_start = mcache_boundaries[right_cache_id] - shift; + else + range_start = 0; + + if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { + break; + } + range_end = range_start - 1; + } + right_cache_id = (right_cache_id + 1) % num_boundaries; + + if (right_cache_id == left_cache_id) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = -1; + } + success = true; + } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = right_cache_id; + } + success = true; + } + } + + return success; +} + +/* +* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. +* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. +*/ +static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, + int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) +{ + int max_shift = 0xFFFF; + unsigned int pipe_index; + unsigned int i, slice_width; + bool success = false; + + for (i = 0; i < num_boundaries; i++) { + if (i == 0) + slice_width = mcache_boundaries[i]; + else + slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; + + if (max_shift > (int)slice_width) { + max_shift = slice_width; + } + } + + for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { + success = true; + for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { + if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, + pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { + success = false; + break; + } + } + if (success) + break; + } + + return success; +} + +/* +* Counts the number of elements inside input array within the given span length. +* Formally, what is the size of the largest subset of the array where the largest and smallest element +* differ no more than the span. +*/ +static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) +{ + unsigned int i; + unsigned int span_start_value; + unsigned int span_start_index; + unsigned int greatest_element_count; + + if (array_size == 0) + return 1; + + if (span == 0) + return array_size > 0 ? 1 : 0; + + span_start_value = 0; + span_start_index = 0; + greatest_element_count = 0; + + while (span_start_index < array_size) { + for (i = span_start_index; i < array_size; i++) { + if (array[i] - span_start_value <= span) { + if (i - span_start_index + 1 > greatest_element_count) { + greatest_element_count = i - span_start_index + 1; + } + } else + break; + } + + span_start_index++; + + if (span_start_index < array_size) { + span_start_value = array[span_start_index - 1] + 1; + } + } + + return greatest_element_count; +} + +static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, + enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) +{ + int i, slice_width; + const char MAX_SCL_VP_OVERLAP = 3; + bool success = false; + + switch (scaling_transform) { + case dml2_scaling_transform_centered: + case dml2_scaling_transform_aspect_ratio: + case dml2_scaling_transform_fullscreen: + slice_width = full_vp_width / num_pipes; + for (i = 0; i < num_pipes; i++) { + pipe_vp_x_start[i] = i * slice_width; + pipe_vp_x_end[i] = (i + 1) * slice_width - 1; + + if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) + pipe_vp_x_start[i] = 0; + else + pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; + + if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) + pipe_vp_x_end[i] = full_vp_width - 1; + else + pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; + } + break; + case dml2_scaling_transform_explicit: + default: + success = false; + break; + } + + return success; +} + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; + + const int MAX_PIXEL_OVERLAP = 6; + int max_per_pipe_vp_p0 = 0; + int max_per_pipe_vp_p1 = 0; + int temp, p0shift, p1shift; + unsigned int plane_index = 0; + unsigned int i; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; + unsigned int num_boundaries; + enum dml2_scaling_transform scaling_transform; + const struct dml2_plane_parameters *plane; + const struct dml2_stream_parameters *stream; + + bool p0pass = false; + bool p1pass = false; + bool all_pass = true; + + for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { + if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) + continue; + + plane = ¶ms->display_cfg->plane_descriptors[plane_index]; + stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; + + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + + if (odm_combine_factor == 1) + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + else + mpc_combine_factor = 1; + + if (odm_combine_factor > 1) { + max_per_pipe_vp_p0 = plane->surface.plane0.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p0) + max_per_pipe_vp_p0 = temp; + + max_per_pipe_vp_p1 = plane->surface.plane1.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p1) + max_per_pipe_vp_p1 = temp; + } else { + max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; + max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; + } + + max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; + max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; + + p0shift = 0; + p1shift = 0; + + // The last element in the unshifted boundary array will always be the first pixel outside the + // plane, which means theres no mcache associated with it, so -1 + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { + p0pass = true; + } + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { + p1pass = true; + } + + if (!p0pass || !p1pass) { + if (odm_combine_factor > 1) { + num_dpps = odm_combine_factor; + scaling_transform = plane->composition.scaling_transform; + } else { + num_dpps = mpc_combine_factor; + scaling_transform = dml2_scaling_transform_fullscreen; + } + + if (!p0pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane0.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + params->mcache_allocations[plane_index].num_mcaches_plane0, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); + } + } + if (!p1pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane1.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + params->mcache_allocations[plane_index].num_mcaches_plane1, + &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); + } + } + } + + if (p0pass && p1pass) { + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; + } + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; + } + } + + params->per_plane_status[plane_index] = p0pass && p1pass; + all_pass &= p0pass && p1pass; + } + + return all_pass; +} + +static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) +{ + // Initialize all entries to special valid MCache ID and special valid split coordinate + per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; +} + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) +{ + int i; + unsigned int j; + int next_unused_cache_id = 0; + + for (i = 0; i < params->num_allocations; i++) { + if (!params->allocations[i].valid) + continue; + + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_plane0[0]; + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_plane1[0]; + + // If we need dedicated caches for mall requesting, then we assign them here. + if (params->allocations[i].requires_dedicated_mall_mcache) { + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_mall_plane0[0]; + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_mall_plane1[0]; + } + + // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same + // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the + // largest mcacheID of P0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + + // If we need dedicated caches handle last slice sharing + if (params->allocations[i].requires_dedicated_mall_mcache) { + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular + // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) + if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; + } + } + + // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting + if (!params->allocations[i].requires_dedicated_mall_mcache) { + memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, + sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); + memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, + sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); + } + } +} + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; + + unsigned int total_mcaches_required; + unsigned int i; + bool result = false; + + if (dml->soc_bbox.num_dcc_mcaches == 0) { + return true; + } + + total_mcaches_required = 0; + l->calc_mcache_params.instance = &dml->core_instance; + for (i = 0; i < params->display_config->num_planes; i++) { + if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { + memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); + continue; + } + + l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; + l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; + l->calc_mcache_params.plane_index = i; + + if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { + result = false; + break; + } + + if (params->mcache_allocations[i].valid) { + total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; + if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) + total_mcaches_required--; + } + } + DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + + if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { + result = false; + } else { + result = true; + } + + return result; +} + +static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; + + bool result = false; + bool mcache_success = false; + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (result) { + struct optimization_phase_params mcache_phase = { + .dml = dml, + .display_config = &l->base_display_config_with_meta, + .test_function = dml2_top_optimization_test_function_mcache, + .optimize_function = dml2_top_optimization_optimize_function_mcache, + .optimized_display_config = &l->optimized_display_config_with_meta, + .all_or_nothing = false, + }; + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = dpmm_programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + } + + in_out->is_supported = mcache_success; + result = result && in_out->is_supported; + + return result; +} + +static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; + + bool result = false; + bool mcache_success = false; + bool uclk_pstate_success = false; + bool vmin_success = false; + bool stutter_success = false; + + memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); + memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); + + memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); + + setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + dml->core_instance.populate_informative(&l->informative_params); + + return false; + } + + /* + * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode + */ + memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); + l->min_clock_for_latency_phase.dml = dml; + l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; + l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; + l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->min_clock_for_latency_phase.all_or_nothing = false; + + dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + } + + /* + * Phase 2: Satisfy DCC mcache requirements + */ + memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); + l->mcache_phase.dml = dml; + l->mcache_phase.display_config = &l->base_display_config_with_meta; + l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; + l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; + l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->mcache_phase.all_or_nothing = true; + + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); + + if (!mcache_success) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + + dml->core_instance.populate_informative(&l->informative_params); + + in_out->programming->informative.failed_mcache_validation = true; + return false; + } + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + + /* + * Phase 3: Optimize for Pstate + */ + memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); + l->uclk_pstate_phase.dml = dml; + l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; + l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; + l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; + l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; + l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->uclk_pstate_phase.all_or_nothing = true; + + uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); + + if (uclk_pstate_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage3.success = true; + } + + /* + * Phase 4: Optimize for Vmin + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->vmin_phase.dml = dml; + l->vmin_phase.display_config = &l->base_display_config_with_meta; + l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; + l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; + l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; + l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->vmin_phase.all_or_nothing = false; + + vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); + + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = vmin_success; + } + + /* + * Phase 5: Optimize for Stutter + */ + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + l->stutter_phase.dml = dml; + l->stutter_phase.display_config = &l->base_display_config_with_meta; + l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; + l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; + l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; + l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->stutter_phase.all_or_nothing = true; + + stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); + + if (stutter_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage5.success = true; + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = in_out->programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + if (!result) + in_out->programming->informative.failed_dpmm = true; + } + + if (result) { + l->mode_programming_params.instance = &dml->core_instance; + l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; + l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; + l->mode_programming_params.programming = in_out->programming; + result = dml->core_instance.mode_programming(&l->mode_programming_params); + if (!result) + in_out->programming->informative.failed_mode_programming = true; + } + + if (result) { + l->dppm_map_watermarks_params.core = &dml->core_instance; + l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_watermarks_params.programming = in_out->programming; + result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); + } + + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = result; + + dml->core_instance.populate_informative(&l->informative_params); + + return result; +} + +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) +{ + bool success = true; + int config_index, pipe_index; + int first_offset, second_offset; + int free_per_plane_reg_index = 0; + + memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); + + for (config_index = 0; config_index < params->num_configurations; config_index++) { + for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { + // Allocate storage for the mcache regs + params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; + + reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); + + if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { + // P0 always enabled + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + } + + // Populate P1 if enabled + if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + } + } + } + } + } + + return success; +} + +static const struct dml2_top_funcs soc15_funcs = { + .check_mode_supported = dml2_top_soc15_check_mode_supported, + .build_mode_programming = dml2_top_soc15_build_mode_programming, + .build_mcache_programming = dml2_top_soc15_build_mcache_programming, +}; + +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; + struct dml2_core_initialize_in_out core_init_params = { 0 }; + struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; + struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; + bool result = false; + + memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); + + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + dml->project_id = in_out->options.project_id; + dml->pmo_options = in_out->options.pmo_options; + + // Initialize All Components + result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); + + if (result) + result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); + + if (result) + result = dml2_core_create(in_out->options.project_id, &dml->core_instance); + + if (result) { + mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; + mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; + result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); + } + + if (result) { + core_init_params.project_id = in_out->options.project_id; + core_init_params.instance = &dml->core_instance; + core_init_params.minimum_clock_table = &dml->min_clk_table; + core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; + core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; + core_init_params.ip_caps = &in_out->ip_caps; + core_init_params.soc_bb = &in_out->soc_bb; + result = dml->core_instance.initialize(&core_init_params); + + if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + } + } + + if (result) + result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); + + if (result) { + pmo_init_params.instance = &dml->pmo_instance; + pmo_init_params.soc_bb = &dml->soc_bbox; + pmo_init_params.ip_caps = &dml->ip_caps; + pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; + pmo_init_params.options = &dml->pmo_options; + dml->pmo_instance.initialize(&pmo_init_params); + } + dml->funcs = soc15_funcs; + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h new file mode 100644 index 000000000000..53bd8602f9ef --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_SOC15_H__ +#define __DML2_TOP_SOC15_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out); + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); +#endif /* __DML2_TOP_SOC15_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h new file mode 100644 index 000000000000..611c80f4f1bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DEBUG_H__ +#define __DML2_DEBUG_H__ + +#include "os_types.h" +#define DML_ASSERT(condition) ASSERT(condition) +#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN +#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) + +/* private helper macros */ +#define _BOOL_FORMAT(field) "%s", field ? "true" : "false" +#define _UINT_FORMAT(field) "%u", field +#define _INT_FORMAT(field) "%d", field +#define _DOUBLE_FORMAT(field) "%lf", field +#define _ELEMENT_FUNC "function" +#define _ELEMENT_COMP_IF "component_interface" +#define _ELEMENT_TOP_IF "top_interface" +#define _LOG_ENTRY(element) do { \ + DML_LOG_INTERNAL("<"element" name=\""); \ + DML_LOG_INTERNAL(__func__); \ + DML_LOG_INTERNAL("\">\n"); \ +} while (0) +#define _LOG_EXIT(element) DML_LOG_INTERNAL("</"element">\n") +#define _LOG_SCALAR(field, format) do { \ + DML_LOG_INTERNAL(#field" = "format(field)); \ + DML_LOG_INTERNAL("\n"); \ +} while (0) +#define _LOG_ARRAY(field, size, format) do { \ + DML_LOG_INTERNAL(#field " = ["); \ + for (int _i = 0; _i < (int) size; _i++) { \ + DML_LOG_INTERNAL(format(field[_i])); \ + if (_i + 1 == (int) size) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ +}} while (0) +#define _LOG_2D_ARRAY(field, size0, size1, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL(format(field[_i][_j])); \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) +#define _LOG_3D_ARRAY(field, size0, size1, size2, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL("["); \ + for (int _k = 0; _k < (int) size2; _k++) { \ + DML_LOG_INTERNAL(format(field[_i][_j][_k])); \ + if (_k + 1 == (int) size2) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) + +/* fatal errors for unrecoverable DML states until a full reset */ +#define DML_LOG_LEVEL_FATAL 0 +/* unexpected but recoverable failures inside DML */ +#define DML_LOG_LEVEL_ERROR 1 +/* unexpected inputs or events to DML */ +#define DML_LOG_LEVEL_WARN 2 +/* high level tracing of DML interfaces */ +#define DML_LOG_LEVEL_INFO 3 +/* tracing of DML internal executions */ +#define DML_LOG_LEVEL_DEBUG 4 +/* detailed tracing of DML calculation procedure */ +#define DML_LOG_LEVEL_VERBOSE 5 + +#ifndef DML_LOG_LEVEL +#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT +#endif /* #ifndef DML_LOG_LEVEL */ + +/* public macros for DML_LOG_LEVEL_FATAL and up */ +#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) + +/* public macros for DML_LOG_LEVEL_ERROR and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR +#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) +#define DML_ASSERT_MSG(condition, fmt, ...) \ + do { \ + if (!(condition)) { \ + DML_LOG_ERROR("ASSERT hit in %s line %d\n", __func__, __LINE__); \ + DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ + DML_ASSERT(condition); \ + } \ + } while (0) +#else +#define DML_LOG_ERROR(fmt, ...) ((void)0) +#define DML_ASSERT_MSG(condition, fmt, ...) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_WARN and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN +#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) +#else +#define DML_LOG_WARN(fmt, ...) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_INFO and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO +#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) +#define DML_LOG_TOP_IF_ENTER() _LOG_ENTRY(_ELEMENT_TOP_IF) +#define DML_LOG_TOP_IF_EXIT() _LOG_EXIT(_ELEMENT_TOP_IF) +#else +#define DML_LOG_INFO(fmt, ...) ((void)0) +#define DML_LOG_TOP_IF_ENTER() ((void)0) +#define DML_LOG_TOP_IF_EXIT() ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_DEBUG and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG +#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) +#define DML_LOG_COMP_IF_ENTER() _LOG_ENTRY(_ELEMENT_COMP_IF) +#define DML_LOG_COMP_IF_EXIT() _LOG_EXIT(_ELEMENT_COMP_IF) +#define DML_LOG_FUNC_ENTER() _LOG_ENTRY(_ELEMENT_FUNC) +#define DML_LOG_FUNC_EXIT() _LOG_EXIT(_ELEMENT_FUNC) +#define DML_LOG_DEBUG_BOOL(field) _LOG_SCALAR(field, _BOOL_FORMAT) +#define DML_LOG_DEBUG_UINT(field) _LOG_SCALAR(field, _UINT_FORMAT) +#define DML_LOG_DEBUG_INT(field) _LOG_SCALAR(field, _INT_FORMAT) +#define DML_LOG_DEBUG_DOUBLE(field) _LOG_SCALAR(field, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) _LOG_ARRAY(field, size, _BOOL_FORMAT) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) _LOG_ARRAY(field, size, _UINT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) _LOG_ARRAY(field, size, _INT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) _LOG_ARRAY(field, size, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _BOOL_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _UINT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _INT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _BOOL_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _UINT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _INT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _DOUBLE_FORMAT) +#else +#define DML_LOG_DEBUG(fmt, ...) ((void)0) +#define DML_LOG_COMP_IF_ENTER() ((void)0) +#define DML_LOG_COMP_IF_EXIT() ((void)0) +#define DML_LOG_FUNC_ENTER() ((void)0) +#define DML_LOG_FUNC_EXIT() ((void)0) +#define DML_LOG_DEBUG_BOOL(field) ((void)0) +#define DML_LOG_DEBUG_UINT(field) ((void)0) +#define DML_LOG_DEBUG_INT(field) ((void)0) +#define DML_LOG_DEBUG_DOUBLE(field) ((void)0) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_VERBOSE */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE +#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_VERBOSE(fmt, ...) ((void)0) +#endif /* #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE */ +#endif /* __DML2_DEBUG_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h new file mode 100644 index 000000000000..1a6c0727cd2a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_INTERNAL_SHARED_TYPES_H__ +#define __DML2_INTERNAL_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_types.h" +#include "dml2_core_shared_types.h" +/* +* DML2 MCG Types and Interfaces +*/ + +#define DML_MCG_MAX_CLK_TABLE_SIZE 20 + +struct dram_bw_to_min_clk_table_entry { + unsigned long long pre_derate_dram_bw_kbps; + unsigned long min_fclk_khz; + unsigned long min_dcfclk_khz; +}; + +struct dml2_mcg_dram_bw_to_min_clk_table { + struct dram_bw_to_min_clk_table_entry entries[DML_MCG_MAX_CLK_TABLE_SIZE]; + + unsigned int num_entries; +}; + +struct dml2_mcg_min_clock_table { + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dscclk; + unsigned int dtbclk; + unsigned int phyclk; + unsigned int fclk; + unsigned int dcfclk; + } max_clocks_khz; + + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dtbclk; + } max_ss_clocks_khz; + + struct { + unsigned int dprefclk; + unsigned int xtalclk; + unsigned int pcierefclk; + unsigned int dchubrefclk; + unsigned int amclk; + } fixed_clocks_khz; + + struct dml2_mcg_dram_bw_to_min_clk_table dram_bw_table; +}; + +struct dml2_mcg_build_min_clock_table_params_in_out { + /* + * Input + */ + struct dml2_soc_bb *soc_bb; + struct { + bool perform_pseudo_build; + } clean_me_up; + + /* + * Output + */ + struct dml2_mcg_min_clock_table *min_clk_table; +}; +struct dml2_mcg_instance { + bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); +}; + +/* +* DML2 DPMM Types and Interfaces +*/ + +struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { + /* + * Input + */ + struct dml2_core_ip_params *ip; + struct dml2_soc_bb *soc_bb; + struct dml2_mcg_min_clock_table *min_clk_table; + const struct display_configuation_with_meta *display_cfg; + struct { + bool perform_pseudo_map; + struct dml2_core_internal_soc_bb *soc_bb; + } clean_me_up; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_map_watermarks_params_in_out { + /* + * Input + */ + const struct display_configuation_with_meta *display_cfg; + const struct dml2_core_instance *core; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + +struct dml2_dpmm_instance { + bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); + bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + + struct dml2_dpmm_scratch dpmm_scratch; +}; + +/* +* DML2 Core Types and Interfaces +*/ + +struct dml2_core_initialize_in_out { + enum dml2_project_id project_id; + struct dml2_core_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_mcg_min_clock_table *minimum_clock_table; + + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + + // FIXME_STAGE2 can remove but dcn3 version still need this + struct { + struct soc_bounding_box_st *soc_bb; + struct soc_states_st *soc_states; + } legacy; +}; + +struct core_bandwidth_requirements { + int urgent_bandwidth_kbytes_per_sec; + int average_bandwidth_kbytes_per_sec; +}; + +struct core_plane_support_info { + int dpps_used; + int dram_change_latency_hiding_margin_in_active; + int active_latency_hiding_us; + int mall_svp_size_requirement_ways; + int nominal_vblank_pstate_latency_hiding_us; + int vactive_det_fill_delay_us[dml2_pstate_type_count]; +}; + +struct core_stream_support_info { + unsigned int odms_used; + unsigned int num_odm_output_segments; // for odm split mode (e.g. a value of 2 for odm_mode_mso_1to2) + + /* FAMS2 SubVP support info */ + unsigned int phantom_min_v_active; + unsigned int phantom_v_startup; + + unsigned int phantom_v_active; + unsigned int phantom_v_total; + int vblank_reserved_time_us; + int num_dsc_slices; + bool dsc_enable; +}; + +struct core_display_cfg_support_info { + bool is_supported; + + struct core_stream_support_info stream_support_info[DML2_MAX_PLANES]; + struct core_plane_support_info plane_support_info[DML2_MAX_PLANES]; + + struct { + struct dml2_core_internal_mode_support_info support_info; + } clean_me_up; +}; + +struct dml2_core_mode_support_result { + struct { + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } svp_prefetch; + + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } active; + + unsigned int dispclk_khz; + unsigned int dpprefclk_khz; + unsigned int dtbrefclk_khz; + unsigned int dcfclk_deepsleep_khz; + unsigned int socclk_khz; + + unsigned int uclk_pstate_supported; + unsigned int fclk_pstate_supported; + struct dml2_core_internal_watermarks watermarks; + } global; + + struct { + unsigned int dscclk_khz; + unsigned int dtbclk_khz; + unsigned int phyclk_khz; + } per_stream[DML2_MAX_PLANES]; + + struct { + unsigned int dppclk_khz; + unsigned int mall_svp_allocation_mblks; + unsigned int mall_full_frame_allocation_mblks; + } per_plane[DML2_MAX_PLANES]; + + struct core_display_cfg_support_info cfg_support_info; +}; + +struct dml2_optimization_stage1_state { + bool performed; + bool success; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage2_state { + bool performed; + bool success; + + // Whether or not each plane supports mcache + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + bool per_plane_mcache_support[DML2_MAX_PLANES]; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; +}; + +#define DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS 8 +#define DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE 10 +#define DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE 3 + +struct dml2_implicit_svp_meta { + bool valid; + unsigned long v_active; + unsigned long v_total; + unsigned long v_front_porch; +}; + +struct dml2_pstate_per_method_common_meta { + /* generic params */ + int allow_start_otg_vline; + int allow_end_otg_vline; + /* scheduling params */ + double allow_time_us; + double disallow_time_us; + double period_us; +}; + +struct dml2_pstate_meta { + bool valid; + double otg_vline_time_us; + int scheduling_delay_otg_vlines; + int vertical_interrupt_ack_delay_otg_vlines; + int allow_to_target_delay_otg_vlines; + int contention_delay_otg_vlines; + int min_allow_width_otg_vlines; + int nom_vtotal; + int vblank_start; + double nom_refresh_rate_hz; + double nom_frame_time_us; + int max_vtotal; + double min_refresh_rate_hz; + double max_frame_time_us; + int blackout_otg_vlines; + int max_allow_delay_otg_vlines; + double nom_vblank_time_us; + struct { + double max_vactive_det_fill_delay_us; + double vactive_latency_hiding_us; + double reserved_vblank_required_us; + int max_vactive_det_fill_delay_otg_vlines; + int reserved_blank_required_vlines; + struct dml2_pstate_per_method_common_meta common; + } method_vactive; + struct { + struct dml2_pstate_per_method_common_meta common; + } method_vblank; + struct { + int programming_delay_otg_vlines; + int df_throttle_delay_otg_vlines; + int prefetch_to_mall_delay_otg_vlines; + unsigned long phantom_vactive; + unsigned long phantom_vfp; + unsigned long phantom_vtotal; + struct dml2_pstate_per_method_common_meta common; + } method_subvp; + struct { + int programming_delay_otg_vlines; + int stretched_vtotal; + struct dml2_pstate_per_method_common_meta common; + } method_drr; +}; + +/* mask of synchronized timings by stream index */ +struct dml2_pmo_synchronized_timing_groups { + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; +}; + +struct dml2_optimization_stage3_state { + bool performed; + bool success; + + // The pstate support mode for each plane + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; + + // Meta-data for implicit SVP generation, indexed by stream index + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + + // Meta-data for FAMS2 + bool fams2_required; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage4_state { + bool performed; + bool success; + bool unoptimizable_streams[DML2_MAX_DCN_PIPES]; +}; + +struct dml2_optimization_stage5_state { + bool performed; + bool success; + + bool optimal_reserved_time_in_vblank_us; + bool vblank_includes_z8_optimization; +}; + +struct display_configuation_with_meta { + struct dml2_display_cfg display_config; + + struct dml2_core_mode_support_result mode_support_result; + + // Stage 1 = Min Clocks for Latency + struct dml2_optimization_stage1_state stage1; + + // Stage 2 = MCache + struct dml2_optimization_stage2_state stage2; + + // Stage 3 = UCLK PState + struct dml2_optimization_stage3_state stage3; + + // Stage 4 = Vmin + struct dml2_optimization_stage4_state stage4; + + // Stage 5 = Stutter + struct dml2_optimization_stage5_state stage5; +}; + +struct dml2_pmo_pstate_strategy { + enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; +struct dml2_core_mode_support_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + + struct dml2_mcg_min_clock_table *min_clk_table; + int min_clk_index; + /* + * Outputs + */ + struct dml2_core_mode_support_result mode_support_result; + + struct { + // Inputs + struct dml_display_cfg_st *display_cfg; + + // Outputs + struct dml_mode_support_info_st *support_info; + unsigned int out_lowest_state_idx; + unsigned int min_fclk_khz; + unsigned int min_dcfclk_khz; + unsigned int min_dram_speed_mts; + unsigned int min_socclk_khz; + unsigned int min_dscclk_khz; + unsigned int min_dtbclk_khz; + unsigned int min_phyclk_khz; + } legacy; +}; + +struct dml2_core_mode_programming_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + /* + * Outputs (also Input the clk freq are also from programming struct) + */ + struct dml2_display_cfg_programming *programming; + +}; + +struct dml2_core_populate_informative_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + + // If this is set, then the mode was supported, and mode programming + // was successfully run. + // Otherwise, mode programming was not run, because mode support failed. + bool mode_is_supported; + + /* + * Outputs + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_calculate_mcache_allocation_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int plane_index; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocation; +}; + +struct dml2_core_internal_state_inputs { + unsigned int dummy; +}; + +struct dml2_core_internal_state_intermediates { + unsigned int dummy; +}; + +struct dml2_core_mode_support_locals { + union { + struct dml2_core_calcs_mode_support_ex mode_support_ex_params; + }; + struct dml2_display_cfg svp_expanded_display_cfg; + struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; +}; + +struct dml2_core_mode_programming_locals { + union { + struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; + }; + struct dml2_display_cfg svp_expanded_display_cfg; +}; + +struct dml2_core_scratch { + struct dml2_core_mode_support_locals mode_support_locals; + struct dml2_core_mode_programming_locals mode_programming_locals; + int main_stream_index_from_svp_stream_index[DML2_MAX_PLANES]; + int svp_stream_index_from_main_stream_index[DML2_MAX_PLANES]; + int main_plane_index_to_phantom_plane_index[DML2_MAX_PLANES]; + int phantom_plane_index_to_main_plane_index[DML2_MAX_PLANES]; +}; + +struct dml2_core_instance { + enum dml2_project_id project_id; + struct dml2_mcg_min_clock_table *minimum_clock_table; + struct dml2_core_internal_state_inputs inputs; + struct dml2_core_internal_state_intermediates intermediates; + + struct dml2_core_scratch scratch; + + bool (*initialize)(struct dml2_core_initialize_in_out *in_out); + bool (*mode_support)(struct dml2_core_mode_support_in_out *in_out); + bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out); + bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out); + bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out); + + struct { + struct dml2_core_internal_display_mode_lib mode_lib; + } clean_me_up; +}; + +/* +* DML2 PMO Types and Interfaces +*/ + +struct dml2_pmo_initialize_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + struct dml2_pmo_options *options; + int mcg_clock_table_size; +}; + +struct dml2_pmo_optimize_dcc_mcache_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct dml2_display_cfg *display_config; + bool *dcc_mcache_supported; + struct core_display_cfg_support_info *cfg_support_info; + + /* + * Output + */ + struct dml2_display_cfg *optimized_display_cfg; +}; + +struct dml2_pmo_init_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct display_configuation_with_meta *display_config; + const struct dml2_soc_vmin_clock_limits *vmin_limits; +}; + +struct dml2_pmo_optimize_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na) +#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr) +#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp) + +#define PMO_DCN4_MAX_DISPLAYS 4 +#define PMO_DCN4_MAX_NUM_VARIANTS 2 +#define PMO_DCN4_MAX_BASE_STRATEGIES 10 + +struct dml2_scheduling_check_locals { + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; +}; + +struct dml2_pmo_scratch { + union { + struct { + double reserved_time_candidates[DML2_MAX_PLANES][DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS]; + int reserved_time_candidates_count[DML2_MAX_PLANES]; + int current_candidate[DML2_MAX_PLANES]; + int min_latency_index; + int max_latency_index; + int cur_latency_index; + int stream_mask; + } pmo_dcn3; + struct { + struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2]; + unsigned int num_expanded_override_strategies; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + int num_pstate_candidates; + int cur_pstate_candidate; + + unsigned int stream_plane_mask[DML2_MAX_PLANES]; + + unsigned int stream_vactive_capability_mask; + + int min_latency_index; + int max_latency_index; + int cur_latency_index; + + // Stores all the implicit SVP meta information indexed by stream index of the display + // configuration under inspection, built at optimization stage init + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; + + unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE]; + unsigned int num_stutter_candidates; + unsigned int cur_stutter_candidate; + bool z8_vblank_optimizable; + + /* mask of synchronized timings by stream index */ + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; + + /* scheduling check locals */ + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; + double group_phase_offset[DML2_MAX_PLANES]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_init_data { + union { + struct { + /* populated once during initialization */ + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; + unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_instance { + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_pmo_options *options; + + int disp_clk_vmin_threshold; + int mpc_combine_limit; + int odm_combine_limit; + int mcg_clock_table_size; + union { + struct { + struct { + int prefetch_end_to_mall_start_us; + int fw_processing_delay_us; + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + } v1; + struct { + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } drr; + } v2; + } fams_params; + + bool (*initialize)(struct dml2_pmo_initialize_in_out *in_out); + bool (*optimize_dcc_mcache)(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + + bool (*init_for_vmin)(struct dml2_pmo_init_for_vmin_in_out *in_out); + bool (*test_for_vmin)(struct dml2_pmo_test_for_vmin_in_out *in_out); + bool (*optimize_for_vmin)(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + + bool (*init_for_uclk_pstate)(struct dml2_pmo_init_for_pstate_support_in_out *in_out); + bool (*test_for_uclk_pstate)(struct dml2_pmo_test_for_pstate_support_in_out *in_out); + bool (*optimize_for_uclk_pstate)(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + + bool (*init_for_stutter)(struct dml2_pmo_init_for_stutter_in_out *in_out); + bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out); + bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + + struct dml2_pmo_init_data init_data; + struct dml2_pmo_scratch scratch; +}; + +/* +* DML2 MCache Types +*/ + +struct top_mcache_validate_admissability_in_out { + struct dml2_instance *dml2_instance; + + const struct dml2_display_cfg *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + struct dml2_mcache_surface_allocation *mcache_allocations; + + bool per_plane_status[DML2_MAX_PLANES]; + + struct { + const struct dml_mode_support_info_st *mode_support_info; + } legacy; +}; + +struct top_mcache_assign_ids_in_out { + /* + * Input + */ + const struct dml2_mcache_surface_allocation *mcache_allocations; + int plane_count; + + int per_pipe_viewport_x_start[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int per_pipe_viewport_x_end[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int pipe_count_per_plane[DML2_MAX_PLANES]; + + struct dml2_display_mcache_regs *current_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + + /* + * Output + */ + struct dml2_display_mcache_regs mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + struct dml2_build_mcache_programming_in_out *mcache_programming; +}; + +struct top_mcache_calc_mcache_count_and_offsets_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocations; +}; + +struct top_mcache_assign_global_mcache_ids_in_out { + /* + * Inputs/Outputs + */ + struct dml2_mcache_surface_allocation *allocations; + int num_allocations; +}; + +/* +* DML2 Top Types +*/ + +struct dml2_initialize_instance_locals { + int dummy; +}; + +struct dml2_optimization_init_function_locals { + union { + struct { + struct dml2_pmo_init_for_pstate_support_in_out init_params; + } uclk_pstate; + struct { + struct dml2_pmo_init_for_stutter_in_out stutter_params; + } stutter; + struct { + struct dml2_pmo_init_for_vmin_in_out init_params; + } vmin; + }; +}; + +struct dml2_optimization_test_function_locals { + union { + struct { + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + } test_mcache; + struct { + struct dml2_pmo_test_for_vmin_in_out pmo_test_vmin_params; + } test_vmin; + struct { + struct dml2_pmo_test_for_pstate_support_in_out test_params; + } uclk_pstate; + struct { + struct dml2_pmo_test_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_optimize_function_locals { + union { + struct { + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + } optimize_mcache; + struct { + struct dml2_pmo_optimize_for_vmin_in_out pmo_optimize_vmin_params; + } optimize_vmin; + struct { + struct dml2_pmo_optimize_for_pstate_support_in_out optimize_params; + } uclk_pstate; + struct { + struct dml2_pmo_optimize_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_phase_locals { + struct display_configuation_with_meta cur_candidate_display_cfg; + struct display_configuation_with_meta next_candidate_display_cfg; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_init_function_locals init_function_locals; + struct dml2_optimization_test_function_locals test_function_locals; + struct dml2_optimization_optimize_function_locals optimize_function_locals; +}; + +struct dml2_check_mode_supported_locals { + struct dml2_display_cfg display_cfg_working_copy; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; + struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; +}; + +struct optimization_init_function_params { + struct dml2_optimization_init_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_test_function_params { + struct dml2_optimization_test_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_optimize_function_params { + bool last_candidate_supported; + struct dml2_optimization_optimize_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; + struct display_configuation_with_meta *optimized_display_config; +}; + +struct optimization_phase_params { + struct dml2_instance *dml; + const struct display_configuation_with_meta *display_config; // Initial Display Configuration + bool (*init_function)(const struct optimization_init_function_params *params); // Test function to determine optimization is complete + bool (*test_function)(const struct optimization_test_function_params *params); // Test function to determine optimization is complete + bool (*optimize_function)(const struct optimization_optimize_function_params *params); // Function which produces a more optimized display configuration + struct display_configuation_with_meta *optimized_display_config; // The optimized display configuration + + bool all_or_nothing; +}; + +struct dml2_build_mode_programming_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; + struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; + struct dml2_dpmm_map_watermarks_params_in_out dppm_map_watermarks_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct optimization_phase_params min_clock_for_latency_phase; + struct optimization_phase_params mcache_phase; + struct optimization_phase_params uclk_pstate_phase; + struct optimization_phase_params vmin_phase; + struct optimization_phase_params stutter_phase; +}; + +struct dml2_legacy_core_build_mode_programming_wrapper_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct dml2_display_cfg optimized_display_cfg; + struct core_display_cfg_support_info core_support_info; +}; + +struct dml2_top_mcache_verify_mcache_size_locals { + struct dml2_calculate_mcache_allocation_in_out calc_mcache_params; +}; + +struct dml2_top_mcache_validate_admissability_locals { + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane0; + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane1; +}; + +struct dml2_top_display_cfg_support_info { + const struct dml2_display_cfg *display_config; + struct core_display_cfg_support_info core_info; +}; + +struct dml2_top_funcs { + bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out); + bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out); + bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out); +}; + +struct dml2_instance { + enum dml2_project_id project_id; + + struct dml2_core_instance core_instance; + struct dml2_mcg_instance mcg_instance; + struct dml2_dpmm_instance dpmm_instance; + struct dml2_pmo_instance pmo_instance; + + struct dml2_soc_bb soc_bbox; + struct dml2_ip_capabilities ip_caps; + + struct dml2_mcg_min_clock_table min_clk_table; + struct dml2_pmo_options pmo_options; + struct dml2_top_funcs funcs; + + struct { + struct dml2_initialize_instance_locals initialize_instance_locals; + struct dml2_top_mcache_verify_mcache_size_locals mcache_verify_mcache_size_locals; + struct dml2_top_mcache_validate_admissability_locals mcache_validate_admissability_locals; + struct dml2_check_mode_supported_locals check_mode_supported_locals; + struct dml2_build_mode_programming_locals build_mode_programming_locals; + } scratch; + + struct { + struct { + struct dml2_legacy_core_build_mode_programming_wrapper_locals legacy_core_build_mode_programming_wrapper_locals; + } scratch; + } legacy; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c new file mode 100644 index 000000000000..4cfe64aa8492 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c @@ -0,0 +1,1174 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml2_mall_phantom.h" + +#include "dml2_dc_types.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_dc_resource_mgmt.h" + +#define MAX_ODM_FACTOR 4 +#define MAX_MPCC_FACTOR 4 + +struct dc_plane_pipe_pool { + int pipes_assigned_to_plane[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; + bool pipe_used[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; + int num_pipes_assigned_to_plane_for_mpcc_combine; + int num_pipes_assigned_to_plane_for_odm_combine; +}; + +struct dc_pipe_mapping_scratch { + struct { + unsigned int odm_factor; + unsigned int odm_slice_end_x[MAX_PIPES]; + struct pipe_ctx *next_higher_pipe_for_odm_slice[MAX_PIPES]; + } odm_info; + struct { + unsigned int mpc_factor; + struct pipe_ctx *prev_odm_pipe; + } mpc_info; + + struct dc_plane_pipe_pool pipe_pool; +}; + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i]->stream_id == stream_id) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static int find_disp_cfg_idx_by_plane_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int plane_id) +{ + int i; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (mapping->disp_cfg_to_plane_id_valid[i] && mapping->disp_cfg_to_plane_id[i] == plane_id) + return i; + } + + ASSERT(false); + return __DML2_WRAPPER_MAX_STREAMS_PLANES__; +} + +static int find_disp_cfg_idx_by_stream_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int stream_id) +{ + int i; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (mapping->disp_cfg_to_stream_id_valid[i] && mapping->disp_cfg_to_stream_id[i] == stream_id) + return i; + } + + ASSERT(false); + return __DML2_WRAPPER_MAX_STREAMS_PLANES__; +} + +// The master pipe of a stream is defined as the top pipe in odm slice 0 +static struct pipe_ctx *find_master_pipe_of_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id) +{ + int i; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].stream && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + if (!state->res_ctx.pipe_ctx[i].prev_odm_pipe && !state->res_ctx.pipe_ctx[i].top_pipe) + return &state->res_ctx.pipe_ctx[i]; + } + } + + return NULL; +} + +static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx, + struct dc_state *state, unsigned int plane_id) +{ + int i; + unsigned int plane_id_assigned_to_pipe; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { + if (plane_id_assigned_to_pipe == plane_id) + return &state->res_ctx.pipe_ctx[i]; + } + } + + return NULL; +} + +static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, + struct dc_state *state, unsigned int plane_id, unsigned int *pipes) +{ + int i; + unsigned int num_found = 0; + unsigned int plane_id_assigned_to_pipe = -1; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || !pipe->stream) + continue; + + get_plane_id(ctx, state, pipe->plane_state, pipe->stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[pipe->pipe_idx], + &plane_id_assigned_to_pipe); + if (plane_id_assigned_to_pipe == plane_id && !pipe->prev_odm_pipe + && (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) { + while (pipe) { + struct pipe_ctx *mpc_pipe = pipe; + + while (mpc_pipe) { + pipes[num_found++] = mpc_pipe->pipe_idx; + mpc_pipe = mpc_pipe->bottom_pipe; + if (!mpc_pipe) + break; + if (mpc_pipe->plane_state != pipe->plane_state) + mpc_pipe = NULL; + } + pipe = pipe->next_odm_pipe; + } + break; + } + } + + return num_found; +} + +static bool validate_pipe_assignment(const struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, const struct dml2_dml_to_dc_pipe_mapping *mapping) +{ +// int i, j, k; +// +// unsigned int plane_id; +// +// unsigned int disp_cfg_index; +// +// unsigned int pipes_assigned_to_plane[MAX_PIPES]; +// unsigned int num_pipes_assigned_to_plane; +// +// struct pipe_ctx *top_pipe; +// +// for (i = 0; i < state->stream_count; i++) { +// for (j = 0; j < state->stream_status[i]->plane_count; j++) { +// if (get_plane_id(state, state->stream_status.plane_states[j], &plane_id)) { +// disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); +// num_pipes_assigned_to_plane = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes_assigned_to_plane); +// +// if (disp_cfg_index >= 0 && num_pipes_assigned_to_plane > 0) { +// // Verify the number of pipes assigned matches +// if (disp_cfg->hw.DPPPerSurface != num_pipes_assigned_to_plane) +// return false; +// +// top_pipe = find_top_pipe_in_tree(state->res_ctx.pipe_ctx[pipes_assigned_to_plane[0]]); +// +// // Verify MPC and ODM combine +// if (disp_cfg->hw.ODMMode == dml_odm_mode_bypass) { +// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, false); +// } else { +// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, true); +// } +// +// // TODO: could also do additional verification that the pipes in tree are the same as +// // pipes_assigned_to_plane +// } else { +// ASSERT(false); +// return false; +// } +// } else { +// ASSERT(false); +// return false; +// } +// } +// } + return true; +} + +static bool is_plane_using_pipe(const struct pipe_ctx *pipe) +{ + if (pipe->plane_state) + return true; + + return false; +} + +static bool is_pipe_free(const struct pipe_ctx *pipe) +{ + if (!pipe->plane_state && !pipe->stream) + return true; + + return false; +} + +static unsigned int find_preferred_pipe_candidates(const struct dc_state *existing_state, + const int pipe_count, + const unsigned int stream_id, + unsigned int *preferred_pipe_candidates) +{ + unsigned int num_preferred_candidates = 0; + int i; + + /* There is only one case which we consider for adding a pipe to the preferred + * pipe candidate array: + * + * 1. If the existing stream id of the pipe is equivalent to the stream id + * of the stream we are trying to achieve MPC/ODM combine for. This allows + * us to minimize the changes in pipe topology during the transition. + * + * However this condition comes with a caveat. We need to ignore pipes that will + * require a change in OPP but still have the same stream id. For example during + * an MPC to ODM transiton. + * + * Adding check to avoid pipe select on the head pipe by utilizing dc resource + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. + */ + if (existing_state) { + for (i = 0; i < pipe_count; i++) { + if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; + if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) + continue; + + preferred_pipe_candidates[num_preferred_candidates++] = i; + } + } + } + + return num_preferred_candidates; +} + +static unsigned int find_last_resort_pipe_candidates(const struct dc_state *existing_state, + const int pipe_count, + const unsigned int stream_id, + unsigned int *last_resort_pipe_candidates) +{ + unsigned int num_last_resort_candidates = 0; + int i; + + /* There are two cases where we would like to add a given pipe into the last + * candidate array: + * + * 1. If the pipe requires a change in OPP, for example during an MPC + * to ODM transiton. + * + * 2. If the pipe already has an enabled OTG. + */ + if (existing_state) { + for (i = 0; i < pipe_count; i++) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; + if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || + existing_state->res_ctx.pipe_ctx[i].stream_res.tg) + last_resort_pipe_candidates[num_last_resort_candidates++] = i; + } + } + + return num_last_resort_candidates; +} + +static bool is_pipe_in_candidate_array(const unsigned int pipe_idx, + const unsigned int *candidate_array, + const unsigned int candidate_array_size) +{ + int i; + + for (i = 0; i < candidate_array_size; i++) { + if (candidate_array[i] == pipe_idx) + return true; + } + + return false; +} + +static bool find_more_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, // The state we want to find a free mapping in + unsigned int stream_id, // The stream we want this pipe to drive + int *assigned_pipes, + int *assigned_pipe_count, + int pipes_needed, + const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to +{ + struct pipe_ctx *pipe = NULL; + unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; + unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; + unsigned int num_preferred_candidates = 0; + unsigned int num_last_resort_candidates = 0; + int i; + + if (existing_state) { + num_preferred_candidates = + find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); + + num_last_resort_candidates = + find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); + } + + // First see if any of the preferred are unmapped, and choose those instead + for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = preferred_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // We like to pair pipes starting from the higher order indicies for combining + for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { + // Ignore any pipes that are the preferred or last resort candidate + if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || + is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) + continue; + + pipe = &state->res_ctx.pipe_ctx[i]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = i; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // Only use the last resort pipe candidates as a last resort + for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = last_resort_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + ASSERT(pipes_needed <= 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available + + return pipes_needed <= 0; +} + +static bool find_more_free_pipes(struct dml2_context *ctx, + struct dc_state *state, // The state we want to find a free mapping in + unsigned int stream_id, // The stream we want this pipe to drive + int *assigned_pipes, + int *assigned_pipe_count, + int pipes_needed, + const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to +{ + struct pipe_ctx *pipe = NULL; + unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; + unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; + unsigned int num_preferred_candidates = 0; + unsigned int num_last_resort_candidates = 0; + int i; + + if (existing_state) { + num_preferred_candidates = + find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); + + num_last_resort_candidates = + find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); + } + + // First see if any of the preferred are unmapped, and choose those instead + for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = preferred_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // We like to pair pipes starting from the higher order indicies for combining + for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { + // Ignore any pipes that are the preferred or last resort candidate + if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || + is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) + continue; + + pipe = &state->res_ctx.pipe_ctx[i]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = i; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // Only use the last resort pipe candidates as a last resort + for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = last_resort_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + ASSERT(pipes_needed == 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available + + return pipes_needed == 0; +} + +static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) +{ + bool sorted, swapped; + unsigned int cur_index; + int odm_slice_index; + + for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) { + // Sort each MPCC set + //Un-optimized bubble sort, but that's okay for array sizes <= 6 + + if (pipes->num_pipes_assigned_to_plane_for_mpcc_combine <= 1) + sorted = true; + else + sorted = false; + + cur_index = 0; + swapped = false; + while (!sorted) { + if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) { + swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1], + pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]); + + swapped = true; + } + + cur_index++; + + if (cur_index == pipes->num_pipes_assigned_to_plane_for_mpcc_combine - 1) { + cur_index = 0; + + if (swapped) + sorted = false; + else + sorted = true; + + swapped = false; + } + + } + } +} + +// For example, 3840 x 2160, ODM2:1 has a slice array of [1919, 3839], meaning, slice0 spans h_pixels 0->1919, and slice1 spans 1920->3840 +static void calculate_odm_slices(const struct dc_stream_state *stream, unsigned int odm_factor, unsigned int *odm_slice_end_x) +{ + unsigned int slice_size = 0; + int i; + + if (odm_factor < 1 || odm_factor > 4) { + ASSERT(false); + return; + } + + slice_size = stream->src.width / odm_factor; + + for (i = 0; i < odm_factor; i++) + odm_slice_end_x[i] = (slice_size * (i + 1)) - 1; + + odm_slice_end_x[odm_factor - 1] = stream->src.width - 1; +} + +static void add_odm_slice_to_odm_tree(struct dml2_context *ctx, + struct dc_state *state, + struct dc_pipe_mapping_scratch *scratch, + unsigned int odm_slice_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + // MPCC Combine + ODM Combine is not supported, so there should never be a case where the current plane + // has more than 1 pipe mapped to it for a given slice. + ASSERT(scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine == 1 || scratch->pipe_pool.num_pipes_assigned_to_plane_for_odm_combine == 1); + + for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + pipe = &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]]; + + if (scratch->mpc_info.prev_odm_pipe) + scratch->mpc_info.prev_odm_pipe->next_odm_pipe = pipe; + + pipe->prev_odm_pipe = scratch->mpc_info.prev_odm_pipe; + pipe->next_odm_pipe = NULL; + } + scratch->mpc_info.prev_odm_pipe = pipe; +} + +static struct pipe_ctx *add_plane_to_blend_tree(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_plane_state *plane, + struct dc_plane_pipe_pool *pipe_pool, + unsigned int odm_slice, + struct pipe_ctx *top_pipe) +{ + int i; + + for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + if (top_pipe) + top_pipe->bottom_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; + + pipe_pool->pipe_used[odm_slice][i] = true; + + state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].top_pipe = top_pipe; + state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].bottom_pipe = NULL; + + top_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; + } + + // After running the above loop, the top pipe actually ends up pointing to the bottom of this MPCC combine tree, so we are actually + // returning the bottom pipe here + return top_pipe; +} + +static unsigned int find_pipes_assigned_to_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id, unsigned int *pipes) +{ + int i; + unsigned int num_found = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->stream->stream_id == stream_id && !pipe->top_pipe && !pipe->prev_odm_pipe) { + while (pipe) { + pipes[num_found++] = pipe->pipe_idx; + pipe = pipe->next_odm_pipe; + } + break; + } + } + + return num_found; +} + +static struct pipe_ctx *assign_pipes_to_stream(struct dml2_context *ctx, struct dc_state *state, + const struct dc_stream_state *stream, + int odm_factor, + struct dc_plane_pipe_pool *pipe_pool, + const struct dc_state *existing_state) +{ + struct pipe_ctx *master_pipe; + unsigned int pipes_needed; + unsigned int pipes_assigned; + unsigned int pipes[MAX_PIPES] = {0}; + unsigned int next_pipe_to_assign; + int odm_slice; + + pipes_needed = odm_factor; + + master_pipe = find_master_pipe_of_stream(ctx, state, stream->stream_id); + ASSERT(master_pipe); + + pipes_assigned = find_pipes_assigned_to_stream(ctx, state, stream->stream_id, pipes); + + find_more_free_pipes(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); + + ASSERT(pipes_assigned == pipes_needed); + + next_pipe_to_assign = 0; + for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) + pipe_pool->pipes_assigned_to_plane[odm_slice][0] = pipes[next_pipe_to_assign++]; + + pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = 1; + pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; + + return master_pipe; +} + +static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct dc_state *state, + const struct dc_stream_state *stream, + const struct dc_plane_state *plane, + int odm_factor, + int mpc_factor, + int plane_index, + struct dc_plane_pipe_pool *pipe_pool, + const struct dc_state *existing_state) +{ + struct pipe_ctx *master_pipe = NULL; + unsigned int plane_id; + unsigned int pipes_needed; + unsigned int pipes_assigned; + unsigned int pipes[MAX_PIPES] = {0}; + unsigned int next_pipe_to_assign; + int odm_slice, mpc_slice; + + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { + ASSERT(false); + return master_pipe; + } + + pipes_needed = mpc_factor * odm_factor; + + master_pipe = find_master_pipe_of_plane(ctx, state, plane_id); + ASSERT(master_pipe); + + pipes_assigned = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes); + + find_more_pipes_for_stream(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); + + ASSERT(pipes_assigned >= pipes_needed); + + next_pipe_to_assign = 0; + for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) + for (mpc_slice = 0; mpc_slice < mpc_factor; mpc_slice++) + pipe_pool->pipes_assigned_to_plane[odm_slice][mpc_slice] = pipes[next_pipe_to_assign++]; + + pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = mpc_factor; + pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; + + return master_pipe; +} + +static bool is_pipe_used(const struct dc_plane_pipe_pool *pool, unsigned int pipe_idx) +{ + int i, j; + + for (i = 0; i < pool->num_pipes_assigned_to_plane_for_odm_combine; i++) { + for (j = 0; j < pool->num_pipes_assigned_to_plane_for_mpcc_combine; j++) { + if (pool->pipes_assigned_to_plane[i][j] == pipe_idx && pool->pipe_used[i][j]) + return true; + } + } + + return false; +} + +static void free_pipe(struct pipe_ctx *pipe) +{ + memset(pipe, 0, sizeof(struct pipe_ctx)); +} + +static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, + const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index) +{ + int i; + bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].plane_state == plane && + state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id && + (!is_plane_duplicate || + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index) && + !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) { + free_pipe(&state->res_ctx.pipe_ctx[i]); + } + } +} + +static void remove_pipes_from_blend_trees(struct dml2_context *ctx, struct dc_state *state, struct dc_plane_pipe_pool *pipe_pool, unsigned int odm_slice) +{ + struct pipe_ctx *pipe; + int i; + + for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][0]]; + if (pipe->top_pipe) + pipe->top_pipe->bottom_pipe = pipe->bottom_pipe; + + if (pipe->bottom_pipe) + pipe->bottom_pipe = pipe->top_pipe; + + pipe_pool->pipe_used[odm_slice][i] = true; + } +} + +static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, + struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) +{ + int odm_slice_index; + struct pipe_ctx *master_pipe = NULL; + + + master_pipe = assign_pipes_to_stream(ctx, state, stream, scratch->odm_info.odm_factor, &scratch->pipe_pool, existing_state); + sort_pipes_for_splitting(&scratch->pipe_pool); + + for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { + remove_pipes_from_blend_trees(ctx, state, &scratch->pipe_pool, odm_slice_index); + + add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); + + ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, + master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][0]], true); + } +} + +static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane, + int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) +{ + int odm_slice_index; + unsigned int plane_id; + struct pipe_ctx *master_pipe = NULL; + int i; + + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { + ASSERT(false); + return; + } + + master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, + scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state); + sort_pipes_for_splitting(&scratch->pipe_pool); + + for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { + // Now we have a list of all pipes to be used for this plane/stream, now setup the tree. + scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index] = add_plane_to_blend_tree(ctx, state, + plane, + &scratch->pipe_pool, + odm_slice_index, + scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index]); + + add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); + + for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + + ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, + master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]], true); + } + } + + free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index); +} + +static unsigned int get_target_mpc_factor(struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_stream_status *status, + const struct dc_stream_state *stream, + int plane_idx) +{ + unsigned int plane_id; + unsigned int cfg_idx; + unsigned int mpc_factor; + + if (ctx->architecture == dml2_architecture_20) { + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); + cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; + } else if (ctx->architecture == dml2_architecture_21) { + if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { + struct dc_stream_state *main_stream; + struct dc_stream_status *main_stream_status; + + /* get stream id of main stream */ + main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); + if (!main_stream) { + ASSERT(false); + return 1; + } + + main_stream_status = ctx->config.callbacks.get_stream_status(state, main_stream); + if (!main_stream_status) { + ASSERT(false); + return 1; + } + + /* get plane id for associated main plane */ + get_plane_id(ctx, state, main_stream_status->plane_states[plane_idx], + main_stream->stream_id, plane_idx, &plane_id); + } else { + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); + } + + cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + mpc_factor = ctx->v21.mode_programming.programming->plane_programming[cfg_idx].num_dpps_required; + } else { + mpc_factor = 1; + ASSERT(false); + } + + /* For stereo timings, we need to pipe split */ + if (dml2_is_stereo_timing(stream)) + mpc_factor = 2; + + return mpc_factor; +} + +static unsigned int get_target_odm_factor( + const struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_stream_state *stream) +{ + unsigned int cfg_idx; + + if (ctx->architecture == dml2_architecture_20) { + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, stream->stream_id); + switch (disp_cfg->hw.ODMMode[cfg_idx]) { + case dml_odm_mode_bypass: + return 1; + case dml_odm_mode_combine_2to1: + return 2; + case dml_odm_mode_combine_4to1: + return 4; + default: + break; + } + } else if (ctx->architecture == dml2_architecture_21) { + if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { + struct dc_stream_state *main_stream; + + /* get stream id of main stream */ + main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); + if (!main_stream) + goto failed; + + /* get cfg idx for associated main stream */ + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, main_stream->stream_id); + } else { + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, stream->stream_id); + } + + return ctx->v21.mode_programming.programming->stream_programming[cfg_idx].num_odms_required; + } + +failed: + ASSERT(false); + return 1; +} + +static unsigned int get_source_odm_factor(const struct dml2_context *ctx, + struct dc_state *state, + const struct dc_stream_state *stream) +{ + struct pipe_ctx *otg_master = ctx->config.callbacks.get_otg_master_for_stream(&state->res_ctx, stream); + + if (!otg_master) + return 0; + + return ctx->config.callbacks.get_odm_slice_count(otg_master); +} + +static unsigned int get_source_mpc_factor(const struct dml2_context *ctx, + struct dc_state *state, + const struct dc_plane_state *plane) +{ + struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0}; + int dpp_pipe_count = ctx->config.callbacks.get_dpp_pipes_for_plane(plane, + &state->res_ctx, dpp_pipes); + + ASSERT(dpp_pipe_count > 0); + return ctx->config.callbacks.get_mpc_slice_count(dpp_pipes[0]); +} + + +static void populate_mpc_factors_for_stream( + struct dml2_context *ctx, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + struct dc_state *state, + unsigned int stream_idx, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + const struct dc_stream_status *status = &state->stream_status[stream_idx]; + int i; + + for (i = 0; i < status->plane_count; i++) { + mpc_factors[i].source = get_source_mpc_factor(ctx, state, status->plane_states[i]); + mpc_factors[i].target = (odm_factor.target == 1) ? + get_target_mpc_factor(ctx, state, disp_cfg, mapping, status, state->streams[stream_idx], i) : 1; + } +} + +static void populate_odm_factors(const struct dml2_context *ctx, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + struct dc_state *state, + struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]) +{ + int i; + + for (i = 0; i < state->stream_count; i++) { + odm_factors[i].source = get_source_odm_factor(ctx, state, state->streams[i]); + odm_factors[i].target = get_target_odm_factor( + ctx, state, disp_cfg, mapping, state->streams[i]); + } +} + +static bool unmap_dc_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_state *existing_state, + const struct dc_stream_state *stream, + const struct dc_stream_status *status, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + int plane_idx; + bool result = true; + + for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) + if (mpc_factors[plane_idx].target < mpc_factors[plane_idx].source) + result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + status->plane_states[plane_idx], + mpc_factors[plane_idx].target); + if (odm_factor.target < odm_factor.source) + result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + stream, + odm_factor.target); + return result; +} + +static bool map_dc_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_state *existing_state, + const struct dc_stream_state *stream, + const struct dc_stream_status *status, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + int plane_idx; + bool result = true; + + for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) + if (mpc_factors[plane_idx].target > mpc_factors[plane_idx].source) + result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + status->plane_states[plane_idx], + mpc_factors[plane_idx].target); + if (odm_factor.target > odm_factor.source) + result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + stream, + odm_factor.target); + return result; +} + +static bool map_dc_pipes_with_callbacks(struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_state *existing_state) +{ + int i; + bool result = true; + + populate_odm_factors(ctx, disp_cfg, mapping, state, ctx->pipe_combine_scratch.odm_factors); + for (i = 0; i < state->stream_count; i++) + populate_mpc_factors_for_stream(ctx, disp_cfg, mapping, state, + i, ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + for (i = 0; i < state->stream_count; i++) + result &= unmap_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], + &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + for (i = 0; i < state->stream_count; i++) + result &= map_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], + &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + + return result; +} + +bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state) +{ + int stream_index, plane_index, i; + + unsigned int stream_disp_cfg_index; + unsigned int plane_disp_cfg_index; + unsigned int disp_cfg_index_max; + + unsigned int plane_id; + unsigned int stream_id; + + const unsigned int *ODMMode, *DPPPerSurface; + unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + struct dc_pipe_mapping_scratch scratch; + + if (ctx->config.map_dc_pipes_with_callbacks) + return map_dc_pipes_with_callbacks( + ctx, state, disp_cfg, mapping, existing_state); + + if (ctx->architecture == dml2_architecture_21) { + /* + * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes. + * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to + * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const. + * + */ + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + odm_mode_array[i] = ctx->v21.mode_programming.programming->stream_programming[i].num_odms_required; + dpp_per_surface_array[i] = ctx->v21.mode_programming.programming->plane_programming[i].num_dpps_required; + } + + ODMMode = (const unsigned int *)odm_mode_array; + DPPPerSurface = (const unsigned int *)dpp_per_surface_array; + disp_cfg_index_max = __DML2_WRAPPER_MAX_STREAMS_PLANES__; + } else { + ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; + DPPPerSurface = disp_cfg->hw.DPPPerSurface; + disp_cfg_index_max = __DML_NUM_PLANES__; + } + + for (stream_index = 0; stream_index < state->stream_count; stream_index++) { + memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch)); + + stream_id = state->streams[stream_index]->stream_id; + stream_disp_cfg_index = find_disp_cfg_idx_by_stream_id(mapping, stream_id); + if (stream_disp_cfg_index >= disp_cfg_index_max) + continue; + + if (ctx->architecture == dml2_architecture_20) { + if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } else if (ctx->architecture == dml2_architecture_21) { + /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0. + * This is not an issue with new resource management logic. This block ensure backcompat + * with legacy pipe management with updated DML. + * */ + if (ODMMode[stream_disp_cfg_index] == 1) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == 2) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == 4) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } + calculate_odm_slices(state->streams[stream_index], scratch.odm_info.odm_factor, scratch.odm_info.odm_slice_end_x); + + // If there are no planes, you still want to setup ODM... + if (state->stream_status[stream_index].plane_count == 0) { + map_pipes_for_stream(ctx, state, state->streams[stream_index], &scratch, existing_state); + } + + for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) { + // Planes are ordered top to bottom. + if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index], + stream_id, plane_index, &plane_id)) { + plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + + // Setup mpc_info for this plane + scratch.mpc_info.prev_odm_pipe = NULL; + if (scratch.odm_info.odm_factor == 1 && plane_disp_cfg_index < disp_cfg_index_max) { + // If ODM combine is not inuse, then the number of pipes + // per plane is determined by MPC combine factor + scratch.mpc_info.mpc_factor = DPPPerSurface[plane_disp_cfg_index]; + + //For stereo timings, we need to pipe split + if (dml2_is_stereo_timing(state->streams[stream_index])) + scratch.mpc_info.mpc_factor = 2; + } else { + // If ODM combine is enabled, then we use at most 1 pipe per + // odm slice per plane, i.e. MPC combine is never used + scratch.mpc_info.mpc_factor = 1; + } + + ASSERT(scratch.odm_info.odm_factor * scratch.mpc_info.mpc_factor > 0); + + // Clear the pool assignment scratch (which is per plane) + memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool)); + + map_pipes_for_plane(ctx, state, state->streams[stream_index], + state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state); + } else { + // Plane ID cannot be generated, therefore no DML mapping can be performed. + ASSERT(false); + } + } + + } + + if (!validate_pipe_assignment(ctx, state, disp_cfg, mapping)) + ASSERT(false); + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!ctx->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + } + } + + if (ctx->config.callbacks.build_test_pattern_params && + pipe->stream && + pipe->prev_odm_pipe == NULL && + pipe->top_pipe == NULL) + ctx->config.callbacks.build_test_pattern_params(&state->res_ctx, pipe); + } + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h new file mode 100644 index 000000000000..1538b708d8be --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_DC_RESOURCE_MGMT_H__ +#define __DML2_DC_RESOURCE_MGMT_H__ + +#include "dml2_dc_types.h" + +struct dml2_context; +struct dml2_dml_to_dc_pipe_mapping; +struct dml_display_cfg_st; + +/* + * dml2_map_dc_pipes - Creates a pipe linkage in dc_state based on current display config. + * @ctx: Input dml2 context + * @state: Current dc_state to be updated. + * @disp_cfg: Current display config. + * @mapping: Pipe mapping logic structure to keep a track of pipes to be used. + * + * Based on ODM and DPPPersurface outputs calculated by the DML for the current display + * config, create a pipe linkage in dc_state which is then used by DC core. + * Make this function generic to be used by multiple DML versions. + * + * Return: True if pipe mapping and linking is successful, false otherwise. + */ + +bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h new file mode 100644 index 000000000000..7ca7f2a743c2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +/* + * Wrapper header for externally defined types from DC. These types come from + * dc headers when building DML2 as part of DC, but are defined here when building + * DML2 as a standalone library (such as for unit testing). + */ + +#ifndef __DML2_DC_TYPES_H__ +#define __DML2_DC_TYPES_H__ + +#include "resource.h" +#include "core_types.h" +#include "dsc.h" +#include "clk_mgr.h" +#include "dc_state_priv.h" + +#endif //__DML2_DC_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h new file mode 100644 index 000000000000..55b3e3ca54f7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_INTERNAL_TYPES_H__ +#define __DML2_INTERNAL_TYPES_H__ + +#include "dml2_dc_types.h" +#include "display_mode_core.h" +#include "dml2_wrapper.h" +#include "dml2_policy.h" + +#include "dml_top.h" +#include "dml21_wrapper.h" + +struct dml2_wrapper_optimize_configuration_params { + struct display_mode_lib_st *dml_core_ctx; + struct dml2_configuration_options *config; + struct ip_params_st *ip_params; + struct dml_display_cfg_st *cur_display_config; + struct dml_display_cfg_st *new_display_config; + const struct dml_mode_support_info_st *cur_mode_support_info; + struct dml_mode_eval_policy_st *cur_policy; + struct dml_mode_eval_policy_st *new_policy; +}; + +struct dml2_calculate_lowest_supported_state_for_temp_read_scratch { + struct dml_mode_support_info_st evaluation_info; + dml_float_t uclk_change_latencies[__DML_MAX_STATE_ARRAY_SIZE__]; + struct dml_display_cfg_st cur_display_config; + struct dml_display_cfg_st new_display_config; + struct dml_mode_eval_policy_st new_policy; + struct dml_mode_eval_policy_st cur_policy; +}; + +struct dml2_create_scratch { + struct dml2_policy_build_synthetic_soc_states_scratch build_synthetic_socbb_scratch; + struct soc_states_st in_states; +}; + +struct dml2_calculate_rq_and_dlg_params_scratch { + struct _vcs_dpi_dml_display_rq_regs_st rq_regs; + struct _vcs_dpi_dml_display_dlg_regs_st disp_dlg_regs; + struct _vcs_dpi_dml_display_ttu_regs_st disp_ttu_regs; +}; + +#define __DML2_WRAPPER_MAX_STREAMS_PLANES__ 6 + +struct dml2_dml_to_dc_pipe_mapping { + unsigned int disp_cfg_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool disp_cfg_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int disp_cfg_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool disp_cfg_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; +}; + +struct dml2_wrapper_scratch { + struct dml_display_cfg_st cur_display_config; + struct dml_display_cfg_st new_display_config; + struct dml_mode_eval_policy_st new_policy; + struct dml_mode_eval_policy_st cur_policy; + struct dml_mode_support_info_st mode_support_info; + struct dml_mode_support_ex_params_st mode_support_params; + + struct dummy_pstate_entry dummy_pstate_table[4]; + + struct dml2_create_scratch create_scratch; + struct dml2_calculate_lowest_supported_state_for_temp_read_scratch dml2_calculate_lowest_supported_state_for_temp_read_scratch; + struct dml2_calculate_rq_and_dlg_params_scratch calculate_rq_and_dlg_params_scratch; + + struct dml2_wrapper_optimize_configuration_params optimize_configuration_params; + struct dml2_policy_build_synthetic_soc_states_params build_synthetic_socbb_params; + + struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; + bool enable_flexible_pipe_mapping; + bool plane_duplicate_exists; + int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS]; +}; + +struct dml2_helper_det_policy_scratch { + int dpps_per_surface[MAX_PLANES]; +}; + +enum dml2_architecture { + dml2_architecture_20, + dml2_architecture_21 +}; + +struct prepare_mcache_programming_locals { + struct dml2_build_mcache_programming_in_out build_mcache_programming_params; +}; + +struct dml21_wrapper_scratch { + struct prepare_mcache_programming_locals prepare_mcache_locals; + struct pipe_ctx temp_pipe; +}; + +struct dml2_pipe_combine_factor { + unsigned int source; + unsigned int target; +}; + +struct dml2_pipe_combine_scratch { + struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]; + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES][MAX_PIPES]; +}; + +struct dml2_context { + enum dml2_architecture architecture; + struct dml2_configuration_options config; + struct dml2_helper_det_policy_scratch det_helper_scratch; + struct dml2_pipe_combine_scratch pipe_combine_scratch; + union { + struct { + struct display_mode_lib_st dml_core_ctx; + struct dml2_wrapper_scratch scratch; + struct dcn_watermarks g6_temp_read_watermark_set; + } v20; + struct { + struct dml21_wrapper_scratch scratch; + struct dml2_initialize_instance_in_out dml_init; + struct dml2_display_cfg display_config; + struct dml2_check_mode_supported_in_out mode_support; + struct dml2_build_mode_programming_in_out mode_programming; + struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; + } v21; + }; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c new file mode 100644 index 000000000000..66040c877d68 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c @@ -0,0 +1,911 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dml2_dc_types.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_mall_phantom.h" + +unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context) +{ + uint32_t num_ways = 0; + uint32_t bytes_per_pixel = 0; + uint32_t cache_lines_used = 0; + uint32_t lines_per_way = 0; + uint32_t total_cache_lines = 0; + uint32_t bytes_in_mall = 0; + uint32_t num_mblks = 0; + uint32_t cache_lines_per_plane = 0; + uint32_t i = 0; + uint32_t mblk_width = 0; + uint32_t mblk_height = 0; + uint32_t full_vp_width_blk_aligned = 0; + uint32_t mall_alloc_width_blk_aligned = 0; + uint32_t mall_alloc_height_blk_aligned = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the phantom pipes + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; + mblk_width = ctx->config.mall_cfg.mblk_width_pixels; + mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels; + + /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - + * FLOOR(vp_x_start, blk_width) + */ + full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); + + /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ + mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; + + /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ + mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mblk_height * mblk_height + mblk_height; + + /* full_mblk_width_ub_l/c = malldml2_mall_phantom.c_alloc_width_blk_aligned_l/c; + * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; + * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); + * (Should be divisible, but round up if not) + */ + num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * + ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); + bytes_in_mall = num_mblks * ctx->config.mall_cfg.mblk_size_bytes; + // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment + // (MALL is 64-byte aligned) + cache_lines_per_plane = bytes_in_mall / ctx->config.mall_cfg.cache_line_size_bytes + 2; + + // For DCC we must cache the meat surface, so double cache lines required + if (pipe->plane_state->dcc.enable) + cache_lines_per_plane *= 2; + cache_lines_used += cache_lines_per_plane; + } + } + + total_cache_lines = ctx->config.mall_cfg.max_cab_allocation_bytes / ctx->config.mall_cfg.cache_line_size_bytes; + lines_per_way = total_cache_lines / ctx->config.mall_cfg.cache_num_ways; + num_ways = cache_lines_used / lines_per_way; + if (cache_lines_used % lines_per_way > 0) + num_ways++; + + return num_ways; +} + +static void merge_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *context) +{ + int i; + + /* merge pipes if necessary */ + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // For now merge all pipes for SubVP since pipe split case isn't supported yet + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + ctx->config.svp_pstate.callbacks.release_dsc(&context->res_ctx, ctx->config.svp_pstate.callbacks.dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + } + } +} + +static bool all_pipes_have_stream_and_plane(struct dml2_context *ctx, const struct dc_state *context) +{ + int i; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (!pipe->plane_state) + return false; + } + return true; +} + +static bool mpo_in_use(const struct dc_state *context) +{ + int i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count > 1) + return true; + } + return false; +} + +/* + * dcn32_get_num_free_pipes: Calculate number of free pipes + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * Number of free pipes available in the context + */ +static unsigned int get_num_free_pipes(struct dml2_context *ctx, struct dc_state *state) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = ctx->config.dcn_pipe_count - num_pipes; + return free_pipes; +} + +/* + * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * @param dc: current dc state + * @param context: new dc state + * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * Return: + * True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context, unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = 2; //dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + /* SubVP pipe candidate requirements: + * - Refresh rate < 120hz + * - Not able to switch in vactive naturally (switching in active means the + * DET provides enough buffer to hide the P-State switch latency -- trying + * to combine this with SubVP can cause issues with the scheduling). + */ + if (pipe->plane_state && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 && + vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/* + * enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *state) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = ctx->config.dcn_pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = get_num_free_pipes(ctx, state); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < ctx->config.dcn_pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/* + * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us + + ctx->config.svp_pstate.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/* + * dml2_svp_drr_schedulable: Determine if SubVP + DRR config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * Return: + * bool - True if the SubVP + DRR config is schedulable, false otherwise + */ +bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_stream_state *phantom_stream; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) + break; + } + + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); + main_timing = &pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/* + * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe_mall_type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing); + } else if (found) { + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream); + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/* + * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle + * static analysis based on the case. + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Return: + * bool - True if statically schedulable, false otherwise + */ +bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe_mall_type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe_mall_type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(ctx, context); + } else if (pstate_change_type == dml_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(ctx, context); + } else if (pstate_change_type == dml_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state *state, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + unsigned int dc_pipe_idx, + unsigned int svp_height, + unsigned int svp_vstartup) +{ + unsigned int i; + double line_time, fp_and_sync_width_time; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000)); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(ctx->config.svp_pstate.subvp_fw_processing_delay_us + + ctx->config.svp_pstate.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ + phantom_vactive = svp_height + pstate_width_fw_delay_lines + ctx->config.svp_pstate.subvp_swath_height_margin_lines; + + phantom_stream->timing.v_front_porch = 1; + + line_time = phantom_stream->timing.h_total / ((double)phantom_stream->timing.pix_clk_100hz * 100); + fp_and_sync_width_time = (phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width) * line_time; + + if ((svp_vstartup * line_time) + fp_and_sync_width_time > cvt_rb_vblank_max) { + svp_vstartup = (cvt_rb_vblank_max - fp_and_sync_width_time) / line_time; + } + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = svp_vstartup; + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing +} + +static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup) +{ + struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; + struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream( + ctx->config.svp_pstate.callbacks.dc, + state, + ref_pipe->stream); + + /* stream has limited viewport and small timing */ + memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup); + + ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_stream, + ref_pipe->stream); + return phantom_stream; +} + +static void enable_phantom_plane(struct dml2_context *ctx, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + unsigned int dc_pipe_idx) +{ + struct dc_plane_state *phantom_plane = NULL; + struct dc_plane_state *prev_phantom_plane = NULL; + struct pipe_ctx *curr_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; + + while (curr_pipe) { + if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) { + phantom_plane = prev_phantom_plane; + } else { + phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane( + ctx->config.svp_pstate.callbacks.dc, + state, + curr_pipe->plane_state); + if (!phantom_plane) + return; + } + + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); + //phantom_plane->tiling_info.gfx10compatible.compat_level = curr_pipe->plane_state->tiling_info.gfx10compatible.compat_level; + phantom_plane->format = curr_pipe->plane_state->format; + phantom_plane->rotation = curr_pipe->plane_state->rotation; + phantom_plane->visible = curr_pipe->plane_state->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; + + ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); + + curr_pipe = curr_pipe->bottom_pipe; + prev_phantom_plane = phantom_plane; + } +} + +static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_state *state, unsigned int main_pipe_idx, unsigned int svp_height, unsigned int vstartup) +{ + struct dc_stream_state *phantom_stream = NULL; + unsigned int i; + + // The index of the DC pipe passed into this function is guarenteed to + // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't + // already have phantom pipe assigned, etc.) by previous checks. + phantom_stream = enable_phantom_stream(ctx, state, main_pipe_idx, svp_height, vstartup); + enable_phantom_plane(ctx, state, phantom_stream, main_pipe_idx); + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + // Build scaling params for phantom pipes which were newly added. + // We determine which phantom pipes were added by comparing with + // the phantom stream. + if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + pipe->stream->use_dynamic_meta = false; + pipe->plane_state->flip_immediate = false; + if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) { + // Log / remove phantom pipes since failed to build scaling params + } + } + } +} + +static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) +{ + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; + + for (i = 0; i < context->stream_count; i++) + if (context->streams[i] == stream) { + stream_status = &context->stream_status[i]; + break; + } + + if (stream_status == NULL) { + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) { + if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) + return false; + ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]); + } + + return true; +} + +bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state) +{ + int i; + bool removed_pipe = false; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + // build scaling params for phantom pipes + if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + phantom_stream = pipe->stream; + + remove_all_phantom_planes_for_stream(ctx, phantom_stream, state); + ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + + removed_pipe = true; + } + + if (pipe->plane_state) { + pipe->plane_state->is_phantom = false; + } + } + return removed_pipe; +} + + +/* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ +bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info) +{ + unsigned int dc_pipe_idx, dml_pipe_idx; + unsigned int svp_height, vstartup; + + if (ctx->config.svp_pstate.force_disable_subvp) + return false; + + if (!all_pipes_have_stream_and_plane(ctx, state)) + return false; + + if (mpo_in_use(state)) + return false; + + merge_pipes_for_subvp(ctx, state); + // to re-initialize viewport after the pipe merge + for (int i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &state->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->plane_state || !pipe_ctx->stream) + continue; + + ctx->config.svp_pstate.callbacks.build_scaling_params(pipe_ctx); + } + + if (enough_pipes_for_subvp(ctx, state) && assign_subvp_pipe(ctx, state, &dc_pipe_idx)) { + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(ctx, state->res_ctx.pipe_ctx[dc_pipe_idx].stream->stream_id); + svp_height = mode_support_info->SubViewportLinesNeededInMALL[dml_pipe_idx]; + vstartup = dml_get_vstartup_calculated(&ctx->v20.dml_core_ctx, dml_pipe_idx); + + add_phantom_pipes_for_main_pipe(ctx, state, dc_pipe_idx, svp_height, vstartup); + + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h new file mode 100644 index 000000000000..9d64851f54e7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_MALL_PHANTOM_H__ +#define __DML2_MALL_PHANTOM_H__ + +#include "dml2_dc_types.h" +#include "display_mode_core_structs.h" + +struct dml2_svp_helper_select_best_svp_candidate_params { + const struct dml_display_cfg_st *dml_config; + const struct dml_mode_support_info_st *mode_support_info; + const unsigned int blacklist; + unsigned int *candidate_index; +}; + +struct dml2_context; + +unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context); + +bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info); + +bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state); + +bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type); + +bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c new file mode 100644 index 000000000000..ef693f608d59 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml2_policy.h" + +static void get_optimal_ntuple( + const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; + + if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; + + if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; + + return limiting_bw_mbytes_sec; +} + +static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, + struct soc_states_st *table, + struct soc_state_bounding_box_st *entry) +{ + int index = 0; + int i = 0; + float net_bw_of_new_state = 0; + + get_optimal_ntuple(socbb, entry); + + if (table->num_states == 0) { + index = 0; + } else { + net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); + while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) { + index++; + if (index >= (int) table->num_states) + break; + } + + for (i = table->num_states; i > index; i--) { + table->state_array[i] = table->state_array[i - 1]; + } + //ASSERT(index < MAX_CLK_TABLE_SIZE); + } + + table->state_array[index] = *entry; + table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; + table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; + table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; + table->num_states++; +} + +static void remove_entry_from_table_at_index(struct soc_states_st *table, + unsigned int index) +{ + int i; + + if (table->num_states == 0) + return; + + for (i = index; i < (int) table->num_states - 1; i++) { + table->state_array[i] = table->state_array[i + 1]; + } + memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); +} + +int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, + struct dml2_policy_build_synthetic_soc_states_params *p) +{ + int i, j; + unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; + unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; + unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; + + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, + max_uclk_mhz = 0, max_socclk_mhz = 0; + + int num_uclk_dpms = 0, num_fclk_dpms = 0; + + for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + + if (p->in_states->state_array[i].fabricclk_mhz > 0) + num_fclk_dpms++; + if (p->in_states->state_array[i].dram_speed_mts > 0) + num_uclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) + return -1; + + p->out_states->num_states = 0; + + s->entry = p->in_states->state_array[0]; + + s->entry.dispclk_mhz = max_dispclk_mhz; + s->entry.dppclk_mhz = max_dppclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + s->entry.phyclk_mhz = max_phyclk_mhz; + + s->entry.dscclk_mhz = max_dispclk_mhz / 3; + s->entry.phyclk_mhz = max_phyclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + + // Insert all the DCFCLK STAs first + for (i = 0; i < p->num_dcfclk_stas; i++) { + s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = 0; + if (i > 0) + s->entry.socclk_mhz = max_socclk_mhz; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + if (i == 0) { + s->entry.socclk_mhz = min_socclk_mhz; + } else { + s->entry.socclk_mhz = max_socclk_mhz; + } + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert FCLK DPMs (if present) + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + } + // Add max FCLK + else { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Remove states that require higher clocks than are supported + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || + p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || + p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) + remove_entry_from_table_at_index(p->out_states, i); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Round up UCLK to DPMs + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; + break; + } + } + } + } + + // Clamp to min FCLK/DCFCLK + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; + } + if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { + p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < (int) p->out_states->num_states - 1) { + if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && + p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && + p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(p->out_states, i); + else + i++; + } + + return 0; +} + +void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) +{ + for (int i = 0; i < __DML_NUM_PLANES__; i++) { + policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? + policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; + policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; + policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; + } + + /* Change the default policy initializations as per spreadsheet. We might need to + * review and change them later on as per Jun's earlier comments. + */ + policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; + policy->UseMinimumRequiredDCFCLK = false; + policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->USRRetrainingRequiredFinal = true; + policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? + policy->NomDETInKByteOverrideEnable = false; + policy->NomDETInKByteOverrideValue = 0; + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; + policy->SynchronizeTimingsFinal = true; + policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; + policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? + policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? + if (project == dml_project_dcn35 || + project == dml_project_dcn36 || + project == dml_project_dcn351) { + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; + policy->EnhancedPrefetchScheduleAccelerationFinal = 0; + policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ + policy->UseOnlyMaxPrefetchModes = 1; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h new file mode 100644 index 000000000000..e83e05248592 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DML2_POLICY_H__ +#define __DML2_POLICY_H__ + +#include "display_mode_core_structs.h" + +struct dml2_policy_build_synthetic_soc_states_params { + const struct soc_bounding_box_st *in_bbox; + struct soc_states_st *in_states; + struct soc_states_st *out_states; + int *dcfclk_stas_mhz; + int num_dcfclk_stas; +}; + +struct dml2_policy_build_synthetic_soc_states_scratch { + struct soc_state_bounding_box_st entry; +}; + +int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, + struct dml2_policy_build_synthetic_soc_states_params *p); + +void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c new file mode 100644 index 000000000000..d834cb595afa --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c @@ -0,0 +1,1528 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "dml2_internal_types.h" +#include "dml2_translation_helper.h" + +#define NUM_DCFCLK_STAS 5 +#define NUM_DCFCLK_STAS_NEW 8 + +void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) +{ + switch (dml2->v20.dml_core_ctx.project) { + case dml_project_dcn32: + case dml_project_dcn321: + default: + // Hardcoded values for DCN32x + out->vblank_nom_default_us = 600; + out->rob_buffer_size_kbytes = 128; + out->config_return_buffer_size_in_kbytes = 1280; + out->config_return_buffer_segment_size_in_kbytes = 64; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 22; + out->zero_size_buffer_entries = 512; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->dcc_meta_buffer_size_bytes = 6272; + out->gpuvm_max_page_table_levels = 4; + out->hostvm_max_page_table_levels = 0; + out->pixel_chunk_size_kbytes = 8; + //out->alpha_pixel_chunk_size_kbytes; + out->min_pixel_chunk_size_bytes = 1024; + out->meta_chunk_size_kbytes = 2; + out->min_meta_chunk_size_bytes = 256; + out->writeback_chunk_size_kbytes = 8; + out->line_buffer_size_bits = 1171920; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + //Number of pipes after DCN Pipe harvesting + out->max_num_dpp = dml2->config.dcn_pipe_count; + out->max_num_otg = dml2->config.dcn_pipe_count; + out->max_num_wb = 1; + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1; + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->cursor_buffer_size = 16; + out->cursor_chunk_size = 2; + out->dispclk_delay_subtotal = 125; + out->max_inter_dcn_tile_repeaters = 8; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->writeback_line_buffer_buffer_size = 0; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 6016; + out->dsc422_native_support = true; + out->dcc_supported = true; + out->ptoi_supported = false; + + out->gpuvm_enable = false; + out->hostvm_enable = false; + out->cursor_64bpp_support = false; + out->dynamic_metadata_vm_enabled = false; + + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 2; + out->max_num_dp2p0_streams = 4; + break; + + case dml_project_dcn35: + case dml_project_dcn351: + case dml_project_dcn36: + out->rob_buffer_size_kbytes = 64; + out->config_return_buffer_size_in_kbytes = 1792; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 32; + out->zero_size_buffer_entries = 512; + out->pixel_chunk_size_kbytes = 8; + out->alpha_pixel_chunk_size_kbytes = 4; + out->min_pixel_chunk_size_bytes = 1024; + out->meta_chunk_size_kbytes = 2; + out->min_meta_chunk_size_bytes = 256; + out->writeback_chunk_size_kbytes = 8; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->dcc_meta_buffer_size_bytes = 6272; + out->gpuvm_enable = 1; + out->hostvm_enable = 1; + out->gpuvm_max_page_table_levels = 1; + out->hostvm_max_page_table_levels = 2; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 6016; + out->dsc422_native_support = 1; + out->line_buffer_size_bits = 986880; + out->dcc_supported = 1; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + out->max_num_dpp = 4; + out->max_num_otg = 4; + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 2; + out->max_num_dp2p0_streams = 4; + out->max_num_wb = 1; + + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1.11; + + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->dispclk_delay_subtotal = 125; + + out->dynamic_metadata_vm_enabled = false; + out->max_inter_dcn_tile_repeaters = 8; + out->cursor_buffer_size = 16; // kBytes + out->cursor_chunk_size = 2; // kBytes + + out->writeback_line_buffer_buffer_size = 0; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->ptoi_supported = 0; + + out->vblank_nom_default_us = 668; /*not in dml, but in programming guide, hard coded in dml2_translate_ip_params*/ + out->config_return_buffer_segment_size_in_kbytes = 64; /*required, but not exist,, hard coded in dml2_translate_ip_params*/ + break; + + case dml_project_dcn401: + // Hardcoded values for DCN4m + out->vblank_nom_default_us = 668; //600; + out->rob_buffer_size_kbytes = 192; //128; + out->config_return_buffer_size_in_kbytes = 1344; //1280; + out->config_return_buffer_segment_size_in_kbytes = 64; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 22; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->gpuvm_max_page_table_levels = 4; + out->pixel_chunk_size_kbytes = 8; + out->alpha_pixel_chunk_size_kbytes = 4; + out->min_pixel_chunk_size_bytes = 1024; + out->writeback_chunk_size_kbytes = 8; + out->line_buffer_size_bits = 1171920; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + //Number of pipes after DCN Pipe harvesting + out->max_num_dpp = dml2->config.dcn_pipe_count; + out->max_num_otg = dml2->config.dcn_pipe_count; + out->max_num_wb = 1; + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1; + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->dispclk_delay_subtotal = 125; + out->cursor_buffer_size = 24; //16 + out->cursor_chunk_size = 2; + out->max_inter_dcn_tile_repeaters = 8; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->writeback_line_buffer_buffer_size = 0; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 5760; + out->dsc422_native_support = true; + out->dcc_supported = true; + out->ptoi_supported = false; + + out->gpuvm_enable = false; + out->hostvm_enable = false; + out->cursor_64bpp_support = true; //false; + out->dynamic_metadata_vm_enabled = false; + + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 4; //2; + out->max_num_dp2p0_streams = 4; + break; + } +} + +void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) +{ + out->dprefclk_mhz = dml2->config.bbox_overrides.dprefclk_mhz; + out->xtalclk_mhz = dml2->config.bbox_overrides.xtalclk_mhz; + out->pcierefclk_mhz = 100; + out->refclk_mhz = dml2->config.bbox_overrides.dchub_refclk_mhz; + + out->max_outstanding_reqs = 512; + out->pct_ideal_sdp_bw_after_urgent = 100; + out->pct_ideal_fabric_bw_after_urgent = 67; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 20; + out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = 60; + out->pct_ideal_dram_bw_after_urgent_vm_only = 30; + out->pct_ideal_dram_bw_after_urgent_strobe = 67; + out->max_avg_sdp_bw_use_normal_percent = 80; + out->max_avg_fabric_bw_use_normal_percent = 60; + out->max_avg_dram_bw_use_normal_percent = 15; + out->max_avg_dram_bw_use_normal_strobe_percent = 50; + + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = 4096; + out->return_bus_width_bytes = 64; + out->dram_channel_width_bytes = 2; + out->fabric_datapath_to_dcn_data_return_bytes = 64; + out->hostvm_min_page_size_kbytes = 0; + out->gpuvm_min_page_size_kbytes = 256; + out->phy_downspread_percent = 0.38; + out->dcn_downspread_percent = 0.5; + out->dispclk_dppclk_vco_speed_mhz = dml2->config.bbox_overrides.disp_pll_vco_speed_mhz; + out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64 or 32 MB; + + out->do_urgent_latency_adjustment = true; + + switch (dml2->v20.dml_core_ctx.project) { + + case dml_project_dcn32: + default: + out->num_chans = 24; + out->round_trip_ping_latency_dcfclk_cycles = 263; + out->smn_latency_us = 2; + break; + + case dml_project_dcn321: + out->num_chans = 8; + out->round_trip_ping_latency_dcfclk_cycles = 207; + out->smn_latency_us = 0; + break; + + case dml_project_dcn35: + case dml_project_dcn351: + case dml_project_dcn36: + out->num_chans = 4; + out->round_trip_ping_latency_dcfclk_cycles = 106; + out->smn_latency_us = 2; + out->dispclk_dppclk_vco_speed_mhz = 3600; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; + break; + + + case dml_project_dcn401: + out->pct_ideal_fabric_bw_after_urgent = 76; //67; + out->max_avg_sdp_bw_use_normal_percent = 75; //80; + out->max_avg_fabric_bw_use_normal_percent = 57; //60; + + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 0; //4096; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 0; //4096; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = 0; //4096; + + out->num_chans = 16; + out->round_trip_ping_latency_dcfclk_cycles = 1000; //263; + out->smn_latency_us = 0; //2 us + out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64; + break; + } + /* ---Overrides if available--- */ + if (dml2->config.bbox_overrides.dram_num_chan) + out->num_chans = dml2->config.bbox_overrides.dram_num_chan; + + if (dml2->config.bbox_overrides.dram_chanel_width_bytes) + out->dram_channel_width_bytes = dml2->config.bbox_overrides.dram_chanel_width_bytes; +} + +void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, + const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) +{ + struct dml2_policy_build_synthetic_soc_states_scratch *s = &dml2->v20.scratch.create_scratch.build_synthetic_socbb_scratch; + struct dml2_policy_build_synthetic_soc_states_params *p = &dml2->v20.scratch.build_synthetic_socbb_params; + unsigned int dcfclk_stas_mhz[NUM_DCFCLK_STAS] = {0}; + unsigned int dcfclk_stas_mhz_new[NUM_DCFCLK_STAS_NEW] = {0}; + unsigned int dml_project = dml2->v20.dml_core_ctx.project; + + unsigned int i = 0; + unsigned int transactions_per_mem_clock = 16; // project specific, depends on used Memory type + + if (dml_project == dml_project_dcn351) { + p->dcfclk_stas_mhz = dcfclk_stas_mhz_new; + p->num_dcfclk_stas = NUM_DCFCLK_STAS_NEW; + } else { + p->dcfclk_stas_mhz = dcfclk_stas_mhz; + p->num_dcfclk_stas = NUM_DCFCLK_STAS; + } + + p->in_bbox = in_bbox; + p->out_states = out; + p->in_states = &dml2->v20.scratch.create_scratch.in_states; + + + /* Initial hardcoded values */ + switch (dml2->v20.dml_core_ctx.project) { + + case dml_project_dcn32: + default: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 620.0; + p->in_states->state_array[0].dscclk_mhz = 716.667; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 625; + p->in_states->state_array[0].dtbclk_mhz = 1564.0; + p->in_states->state_array[0].fabricclk_mhz = 450.0; + p->in_states->state_array[0].dcfclk_mhz = 300.0; + p->in_states->state_array[0].dispclk_mhz = 2150.0; + p->in_states->state_array[0].dppclk_mhz = 2150.0; + p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 42.97; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 49.94; + p->in_states->state_array[0].fclk_change_latency_us = 20; + p->in_states->state_array[0].usr_retraining_latency_us = 2; + + p->in_states->state_array[1].socclk_mhz = 1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2500.0; + p->in_states->state_array[1].dcfclk_mhz = 1564.0; + p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; + break; + + case dml_project_dcn321: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 582.0; + p->in_states->state_array[0].dscclk_mhz = 573.333; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 313; + p->in_states->state_array[0].dtbclk_mhz = 1564.0; + p->in_states->state_array[0].fabricclk_mhz = 450.0; + p->in_states->state_array[0].dcfclk_mhz = 300.0; + p->in_states->state_array[0].dispclk_mhz = 1720.0; + p->in_states->state_array[0].dppclk_mhz = 1720.0; + p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 19.95; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 24.36; + p->in_states->state_array[0].fclk_change_latency_us = 7; + p->in_states->state_array[0].usr_retraining_latency_us = 0; + + p->in_states->state_array[1].socclk_mhz = 1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2250.0; + p->in_states->state_array[1].dcfclk_mhz = 1434.0; + p->in_states->state_array[1].dram_speed_mts = 1000 * transactions_per_mem_clock; + break; + + + case dml_project_dcn401: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 300; //620.0; + p->in_states->state_array[0].dscclk_mhz = 666.667; //716.667; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 625; + p->in_states->state_array[0].dtbclk_mhz = 2000; //1564.0; + p->in_states->state_array[0].fabricclk_mhz = 300; //450.0; + p->in_states->state_array[0].dcfclk_mhz = 200; //300.0; + p->in_states->state_array[0].dispclk_mhz = 2000; //2150.0; + p->in_states->state_array[0].dppclk_mhz = 2000; //2150.0; + p->in_states->state_array[0].dram_speed_mts = 97 * transactions_per_mem_clock; //100 * + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 1000; //3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 15.70; //42.97; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 20.20; //49.94; + p->in_states->state_array[0].fclk_change_latency_us = 0; //20; + p->in_states->state_array[0].usr_retraining_latency_us = 0; //2; + + p->in_states->state_array[1].socclk_mhz = 1600; //1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2500; //2500.0; + p->in_states->state_array[1].dcfclk_mhz = 1800; //1564.0; + p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; + break; + } + + /* Override from passed values, if available */ + for (i = 0; i < p->in_states->num_states; i++) { + if (dml2->config.bbox_overrides.sr_exit_latency_us) { + p->in_states->state_array[i].sr_exit_time_us = + dml2->config.bbox_overrides.sr_exit_latency_us; + } + + if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { + p->in_states->state_array[i].sr_enter_plus_exit_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; + } + + if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { + p->in_states->state_array[i].sr_exit_z8_time_us = + dml2->config.bbox_overrides.sr_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { + p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.urgent_latency_us) { + p->in_states->state_array[i].urgent_latency_pixel_data_only_us = + dml2->config.bbox_overrides.urgent_latency_us; + } + + if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { + p->in_states->state_array[i].dram_clock_change_latency_us = + dml2->config.bbox_overrides.dram_clock_change_latency_us; + } + + if (dml2->config.bbox_overrides.fclk_change_latency_us) { + p->in_states->state_array[i].fclk_change_latency_us = + dml2->config.bbox_overrides.fclk_change_latency_us; + } + } + + /* DCFCLK stas values are project specific */ + if ((dml2->v20.dml_core_ctx.project == dml_project_dcn32) || + (dml2->v20.dml_core_ctx.project == dml_project_dcn321)) { + p->dcfclk_stas_mhz[0] = p->in_states->state_array[0].dcfclk_mhz; + p->dcfclk_stas_mhz[1] = 615; + p->dcfclk_stas_mhz[2] = 906; + p->dcfclk_stas_mhz[3] = 1324; + p->dcfclk_stas_mhz[4] = p->in_states->state_array[1].dcfclk_mhz; + } else if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && + dml2->v20.dml_core_ctx.project != dml_project_dcn36 && + dml2->v20.dml_core_ctx.project != dml_project_dcn351) { + p->dcfclk_stas_mhz[0] = 300; + p->dcfclk_stas_mhz[1] = 615; + p->dcfclk_stas_mhz[2] = 906; + p->dcfclk_stas_mhz[3] = 1324; + p->dcfclk_stas_mhz[4] = 1500; + } + /* Copy clocks tables entries, if available */ + if (dml2->config.bbox_overrides.clks_table.num_states) { + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { + p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; + } + + p->dcfclk_stas_mhz[0] = dml2->config.bbox_overrides.clks_table.clk_entries[0].dcfclk_mhz; + if (i > 1) + p->dcfclk_stas_mhz[4] = dml2->config.bbox_overrides.clks_table.clk_entries[i-1].dcfclk_mhz; + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels; i++) { + p->in_states->state_array[i].fabricclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].fclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels; i++) { + p->in_states->state_array[i].dram_speed_mts = + dml2->config.bbox_overrides.clks_table.clk_entries[i].memclk_mhz * transactions_per_mem_clock; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels; i++) { + p->in_states->state_array[i].socclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].socclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { + p->in_states->state_array[i].dispclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz; + p->in_states->state_array[i].dppclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz; + } + } + + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn36 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, + max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0; + + for (i = 0; i < p->in_states->num_states; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + } + + for (i = 0; i < p->in_states->num_states; i++) { + /* Independent states - including base (unlisted) parameters from state 0. */ + p->out_states->state_array[i] = p->in_states->state_array[0]; + + p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz; + p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + + p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + + /* Dependent states. */ + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; + p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz; + } + + p->out_states->num_states = p->in_states->num_states; + } else { + dml2_policy_build_synthetic_soc_states(s, p); + } +} + +void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) +{ + const struct _vcs_dpi_ip_params_st *in_ip_params = &in->dml.ip; + /* Copy over the IP params tp dml2_ctx */ + out->compressed_buffer_segment_size_in_kbytes = in_ip_params->compressed_buffer_segment_size_in_kbytes; + out->config_return_buffer_size_in_kbytes = in_ip_params->config_return_buffer_size_in_kbytes; + out->cursor_buffer_size = in_ip_params->cursor_buffer_size; + out->cursor_chunk_size = in_ip_params->cursor_chunk_size; + out->dcc_meta_buffer_size_bytes = in_ip_params->dcc_meta_buffer_size_bytes; + out->dcc_supported = in_ip_params->dcc_supported; + out->dispclk_delay_subtotal = in_ip_params->dispclk_delay_subtotal; + out->dispclk_ramp_margin_percent = in_ip_params->dispclk_ramp_margin_percent; + out->dppclk_delay_cnvc_cursor = in_ip_params->dppclk_delay_cnvc_cursor; + out->dppclk_delay_cnvc_formatter = in_ip_params->dppclk_delay_cnvc_formatter; + out->dppclk_delay_scl = in_ip_params->dppclk_delay_scl; + out->dppclk_delay_scl_lb_only = in_ip_params->dppclk_delay_scl_lb_only; + out->dppclk_delay_subtotal = in_ip_params->dppclk_delay_subtotal; + out->dpte_buffer_size_in_pte_reqs_chroma = in_ip_params->dpte_buffer_size_in_pte_reqs_chroma; + out->dpte_buffer_size_in_pte_reqs_luma = in_ip_params->dpte_buffer_size_in_pte_reqs_luma; + out->dsc422_native_support = in_ip_params->dsc422_native_support; + out->dynamic_metadata_vm_enabled = in_ip_params->dynamic_metadata_vm_enabled; + out->gpuvm_enable = in_ip_params->gpuvm_enable; + out->gpuvm_max_page_table_levels = in_ip_params->gpuvm_max_page_table_levels; + out->hostvm_enable = in_ip_params->hostvm_enable; + out->hostvm_max_page_table_levels = in_ip_params->hostvm_max_page_table_levels; + out->line_buffer_size_bits = in_ip_params->line_buffer_size_bits; + out->maximum_dsc_bits_per_component = in_ip_params->maximum_dsc_bits_per_component; + out->maximum_pixels_per_line_per_dsc_unit = in_ip_params->maximum_pixels_per_line_per_dsc_unit; + out->max_dchub_pscl_bw_pix_per_clk = in_ip_params->max_dchub_pscl_bw_pix_per_clk; + out->max_hscl_ratio = in_ip_params->max_hscl_ratio; + out->max_hscl_taps = in_ip_params->max_hscl_taps; + out->max_inter_dcn_tile_repeaters = in_ip_params->max_inter_dcn_tile_repeaters; + out->max_lb_vscl_bw_pix_per_clk = in_ip_params->max_lb_vscl_bw_pix_per_clk; + out->max_line_buffer_lines = in_ip_params->max_line_buffer_lines; + out->max_num_dp2p0_outputs = in_ip_params->max_num_dp2p0_outputs; + out->max_num_dp2p0_streams = in_ip_params->max_num_dp2p0_streams; + out->max_num_dpp = in_ip_params->max_num_dpp; + out->max_num_hdmi_frl_outputs = in_ip_params->max_num_hdmi_frl_outputs; + out->max_num_otg = in_ip_params->max_num_otg; + out->max_num_wb = in_ip_params->max_num_wb; + out->max_pscl_lb_bw_pix_per_clk = in_ip_params->max_pscl_lb_bw_pix_per_clk; + out->max_vscl_hscl_bw_pix_per_clk = in_ip_params->max_vscl_hscl_bw_pix_per_clk; + out->max_vscl_ratio = in_ip_params->max_vscl_ratio; + out->max_vscl_taps = in_ip_params->max_vscl_taps; + out->meta_chunk_size_kbytes = in_ip_params->meta_chunk_size_kbytes; + out->meta_fifo_size_in_kentries = in_ip_params->meta_fifo_size_in_kentries; + out->min_meta_chunk_size_bytes = in_ip_params->min_meta_chunk_size_bytes; + out->min_pixel_chunk_size_bytes = in_ip_params->min_pixel_chunk_size_bytes; + out->num_dsc = in_ip_params->num_dsc; + out->pixel_chunk_size_kbytes = in_ip_params->pixel_chunk_size_kbytes; + out->ptoi_supported = in_ip_params->ptoi_supported; + out->rob_buffer_size_kbytes = in_ip_params->rob_buffer_size_kbytes; + out->writeback_chunk_size_kbytes = in_ip_params->writeback_chunk_size_kbytes; + out->writeback_interface_buffer_size_kbytes = in_ip_params->writeback_interface_buffer_size_kbytes; + out->writeback_line_buffer_buffer_size = in_ip_params->writeback_line_buffer_buffer_size; + out->writeback_max_hscl_ratio = in_ip_params->writeback_max_hscl_ratio; + out->writeback_max_hscl_taps = in_ip_params->writeback_max_hscl_taps; + out->writeback_max_vscl_ratio = in_ip_params->writeback_max_vscl_ratio; + out->writeback_max_vscl_taps = in_ip_params->writeback_max_vscl_taps; + out->writeback_min_hscl_ratio = in_ip_params->writeback_min_hscl_ratio; + out->writeback_min_vscl_ratio = in_ip_params->writeback_min_vscl_ratio; + out->zero_size_buffer_entries = in_ip_params->zero_size_buffer_entries; + + /* As per hardcoded reference / discussions */ + out->config_return_buffer_segment_size_in_kbytes = 64; + //out->vblank_nom_default_us = 600; + out->vblank_nom_default_us = in_ip_params->VBlankNomDefaultUS; +} + +void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st *out) +{ + const struct _vcs_dpi_soc_bounding_box_st *in_soc_params = &in->dml.soc; + /* Copy over the SOCBB params to dml2_ctx */ + out->dispclk_dppclk_vco_speed_mhz = in_soc_params->dispclk_dppclk_vco_speed_mhz; + out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; + out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; + out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; + out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; + out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; + out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; + out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; + out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; + out->max_avg_dram_bw_use_normal_strobe_percent = in_soc_params->max_avg_dram_bw_use_normal_strobe_percent; + out->max_avg_sdp_bw_use_normal_percent = in_soc_params->max_avg_sdp_bw_use_normal_percent; + out->max_outstanding_reqs = in_soc_params->max_request_size_bytes; + out->num_chans = in_soc_params->num_chans; + out->pct_ideal_dram_bw_after_urgent_strobe = in_soc_params->pct_ideal_dram_bw_after_urgent_strobe; + out->pct_ideal_dram_bw_after_urgent_vm_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_vm_only; + out->pct_ideal_fabric_bw_after_urgent = in_soc_params->pct_ideal_fabric_bw_after_urgent; + out->pct_ideal_sdp_bw_after_urgent = in_soc_params->pct_ideal_sdp_bw_after_urgent; + out->phy_downspread_percent = in_soc_params->downspread_percent; + out->refclk_mhz = 50; // As per hardcoded reference. + out->return_bus_width_bytes = in_soc_params->return_bus_width_bytes; + out->round_trip_ping_latency_dcfclk_cycles = in_soc_params->round_trip_ping_latency_dcfclk_cycles; + out->smn_latency_us = in_soc_params->smn_latency_us; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_only_bytes; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_vm_only_bytes; + out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; + out->pct_ideal_dram_bw_after_urgent_pixel_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_only; + out->dcn_downspread_percent = in_soc_params->dcn_downspread_percent; +} + +void dml2_translate_soc_states(const struct dc *dc, struct soc_states_st *out, int num_states) +{ + unsigned int i = 0; + out->num_states = num_states; + + for (i = 0; i < out->num_states; i++) { + out->state_array[i].dcfclk_mhz = dc->dml.soc.clock_limits[i].dcfclk_mhz; + out->state_array[i].dispclk_mhz = dc->dml.soc.clock_limits[i].dispclk_mhz; + out->state_array[i].dppclk_mhz = dc->dml.soc.clock_limits[i].dppclk_mhz; + out->state_array[i].dram_speed_mts = dc->dml.soc.clock_limits[i].dram_speed_mts; + out->state_array[i].dtbclk_mhz = dc->dml.soc.clock_limits[i].dtbclk_mhz; + out->state_array[i].socclk_mhz = dc->dml.soc.clock_limits[i].socclk_mhz; + out->state_array[i].fabricclk_mhz = dc->dml.soc.clock_limits[i].fabricclk_mhz; + out->state_array[i].dscclk_mhz = dc->dml.soc.clock_limits[i].dscclk_mhz; + out->state_array[i].phyclk_d18_mhz = dc->dml.soc.clock_limits[i].phyclk_d18_mhz; + out->state_array[i].phyclk_d32_mhz = dc->dml.soc.clock_limits[i].phyclk_d32_mhz; + out->state_array[i].phyclk_mhz = dc->dml.soc.clock_limits[i].phyclk_mhz; + out->state_array[i].sr_enter_plus_exit_time_us = dc->dml.soc.sr_enter_plus_exit_time_us; + out->state_array[i].sr_exit_time_us = dc->dml.soc.sr_exit_time_us; + out->state_array[i].fclk_change_latency_us = dc->dml.soc.fclk_change_latency_us; + out->state_array[i].dram_clock_change_latency_us = dc->dml.soc.dram_clock_change_latency_us; + out->state_array[i].usr_retraining_latency_us = dc->dml.soc.usr_retraining_latency_us; + out->state_array[i].writeback_latency_us = dc->dml.soc.writeback_latency_us; + /* Driver initialized values for these are different than the spreadsheet. Use the + * spreadsheet ones for now. We need to decided which ones to use. + */ + out->state_array[i].sr_exit_z8_time_us = dc->dml.soc.sr_exit_z8_time_us; + out->state_array[i].sr_enter_plus_exit_z8_time_us = dc->dml.soc.sr_enter_plus_exit_z8_time_us; + //out->state_array[i].sr_exit_z8_time_us = 5.20; + //out->state_array[i].sr_enter_plus_exit_z8_time_us = 9.60; + out->state_array[i].use_ideal_dram_bw_strobe = true; + out->state_array[i].urgent_latency_pixel_data_only_us = dc->dml.soc.urgent_latency_pixel_data_only_us; + out->state_array[i].urgent_latency_pixel_mixed_with_vm_data_us = dc->dml.soc.urgent_latency_pixel_mixed_with_vm_data_us; + out->state_array[i].urgent_latency_vm_data_only_us = dc->dml.soc.urgent_latency_vm_data_only_us; + out->state_array[i].urgent_latency_adjustment_fabric_clock_component_us = dc->dml.soc.urgent_latency_adjustment_fabric_clock_component_us; + out->state_array[i].urgent_latency_adjustment_fabric_clock_reference_mhz = dc->dml.soc.urgent_latency_adjustment_fabric_clock_reference_mhz; + } +} + +static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +{ + dml_uint_t hblank_start, vblank_start; + + out->HActive[location] = in->timing.h_addressable + in->timing.h_border_left + in->timing.h_border_right; + out->VActive[location] = in->timing.v_addressable + in->timing.v_border_bottom + in->timing.v_border_top; + out->RefreshRate[location] = ((in->timing.pix_clk_100hz * 100) / in->timing.h_total) / in->timing.v_total; + out->VFrontPorch[location] = in->timing.v_front_porch; + out->PixelClock[location] = in->timing.pix_clk_100hz / 10000.00; + if (in->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + out->PixelClock[location] *= 2; + out->HTotal[location] = in->timing.h_total; + out->VTotal[location] = in->timing.v_total; + out->Interlace[location] = in->timing.flags.INTERLACE; + hblank_start = in->timing.h_total - in->timing.h_front_porch; + out->HBlankEnd[location] = hblank_start + - in->timing.h_addressable + - in->timing.h_border_left + - in->timing.h_border_right; + vblank_start = in->timing.v_total - in->timing.v_front_porch; + out->VBlankEnd[location] = vblank_start + - in->timing.v_addressable + - in->timing.v_border_top + - in->timing.v_border_bottom; + out->DRRDisplay[location] = false; +} + +static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) +{ + unsigned int output_bpc; + + out->DSCEnable[location] = (enum dml_dsc_enable)in->timing.flags.DSC; + out->OutputLinkDPLanes[location] = 4; // As per code in dcn20_resource.c + out->DSCInputBitPerComponent[location] = 12; // As per code in dcn20_resource.c + out->DSCSlices[location] = in->timing.dsc_cfg.num_slices_h; + + switch (in->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + out->OutputEncoder[location] = dml_dp; + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; + break; + case SIGNAL_TYPE_EDP: + out->OutputEncoder[location] = dml_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + out->OutputEncoder[location] = dml_hdmi; + break; + default: + out->OutputEncoder[location] = dml_dp; + } + + switch (in->timing.display_color_depth) { + case COLOR_DEPTH_666: + output_bpc = 6; + break; + case COLOR_DEPTH_888: + output_bpc = 8; + break; + case COLOR_DEPTH_101010: + output_bpc = 10; + break; + case COLOR_DEPTH_121212: + output_bpc = 12; + break; + case COLOR_DEPTH_141414: + output_bpc = 14; + break; + case COLOR_DEPTH_161616: + output_bpc = 16; + break; + case COLOR_DEPTH_999: + output_bpc = 9; + break; + case COLOR_DEPTH_111111: + output_bpc = 11; + break; + default: + output_bpc = 8; + break; + } + + switch (in->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + out->OutputFormat[location] = dml_444; + out->OutputBpp[location] = (dml_float_t)output_bpc * 3; + break; + case PIXEL_ENCODING_YCBCR420: + out->OutputFormat[location] = dml_420; + out->OutputBpp[location] = (output_bpc * 3.0) / 2; + break; + case PIXEL_ENCODING_YCBCR422: + if (in->timing.flags.DSC && !in->timing.dsc_cfg.ycbcr422_simple) + out->OutputFormat[location] = dml_n422; + else + out->OutputFormat[location] = dml_s422; + out->OutputBpp[location] = (dml_float_t)output_bpc * 2; + break; + default: + out->OutputFormat[location] = dml_444; + out->OutputBpp[location] = (dml_float_t)output_bpc * 3; + break; + } + + if (in->timing.flags.DSC) { + out->OutputBpp[location] = in->timing.dsc_cfg.bits_per_pixel / 16.0; + } + + // This has been false throughout DCN32x development. If needed we can change this later on. + out->OutputMultistreamEn[location] = false; + + switch (in->signal) { + case SIGNAL_TYPE_NONE: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_RGB: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_VIRTUAL: + default: + out->OutputLinkDPRate[location] = dml_dp_rate_na; + break; + } + + out->PixelClockBackEnd[location] = in->timing.pix_clk_100hz / 10000.00; + + out->AudioSampleLayout[location] = in->audio_info.modes->sample_size; + out->AudioSampleRate[location] = in->audio_info.modes->max_bit_rate; + + out->OutputDisabled[location] = true; +} + +static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +{ + out->SurfaceWidthY[location] = in->timing.h_addressable; + out->SurfaceHeightY[location] = in->timing.v_addressable; + out->SurfaceWidthC[location] = in->timing.h_addressable; + out->SurfaceHeightC[location] = in->timing.v_addressable; + out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128; + out->PitchC[location] = 1; + out->DCCEnable[location] = false; + out->DCCMetaPitchY[location] = 0; + out->DCCMetaPitchC[location] = 0; + out->DCCRateLuma[location] = 1.0; + out->DCCRateChroma[location] = 1.0; + out->DCCFractionOfZeroSizeRequestsLuma[location] = 0; + out->DCCFractionOfZeroSizeRequestsChroma[location] = 0; + out->SurfaceTiling[location] = dml_sw_64kb_r_x; + out->SourcePixelFormat[location] = dml_444_32; +} + +static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_project, struct dml_surface_cfg_st *out, unsigned int location, const struct dc_plane_state *in) +{ + out->PitchY[location] = in->plane_size.surface_pitch; + out->SurfaceHeightY[location] = in->plane_size.surface_size.height; + out->SurfaceWidthY[location] = in->plane_size.surface_size.width; + out->SurfaceHeightC[location] = in->plane_size.chroma_size.height; + out->SurfaceWidthC[location] = in->plane_size.chroma_size.width; + out->PitchC[location] = in->plane_size.chroma_pitch; + out->DCCEnable[location] = in->dcc.enable; + out->DCCMetaPitchY[location] = in->dcc.meta_pitch; + out->DCCMetaPitchC[location] = in->dcc.meta_pitch_c; + out->DCCRateLuma[location] = 1.0; + out->DCCRateChroma[location] = 1.0; + out->DCCFractionOfZeroSizeRequestsLuma[location] = in->dcc.independent_64b_blks; + out->DCCFractionOfZeroSizeRequestsChroma[location] = in->dcc.independent_64b_blks_c; + + switch (dml2_project) { + default: + out->SurfaceTiling[location] = (enum dml_swizzle_mode)in->tiling_info.gfx9.swizzle; + break; + case dml_project_dcn401: + // Temporary use gfx11 swizzle in dml, until proper dml for DCN4x is integrated/implemented + switch (in->tiling_info.gfx_addr3.swizzle) { + case DC_ADDR3_SW_4KB_2D: + case DC_ADDR3_SW_64KB_2D: + case DC_ADDR3_SW_256KB_2D: + default: + out->SurfaceTiling[location] = dml_sw_64kb_r_x; + break; + case DC_ADDR3_SW_LINEAR: + out->SurfaceTiling[location] = dml_sw_linear; + break; + } + } + + switch (in->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + out->SourcePixelFormat[location] = dml_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + out->SourcePixelFormat[location] = dml_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + out->SourcePixelFormat[location] = dml_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + out->SourcePixelFormat[location] = dml_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + out->SourcePixelFormat[location] = dml_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + out->SourcePixelFormat[location] = dml_rgbe_alpha; + break; + default: + out->SourcePixelFormat[location] = dml_444_32; + break; + } +} + +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) +{ + int i; + struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); + + for (i = 0; i < MAX_PIPES; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == in && !pipe->prev_odm_pipe) { + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + temp_pipe->stream_res = pipe->stream_res; + resource_build_scaling_params(temp_pipe); + break; + } + } + + ASSERT(i < MAX_PIPES); + return &temp_pipe->plane_res.scl_data; +} + +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) +{ + dml_uint_t width, height; + + if (in->timing.h_addressable > 3840) + width = 3840; + else + width = in->timing.h_addressable; // 4K max + + if (in->timing.v_addressable > 2160) + height = 2160; + else + height = in->timing.v_addressable; // 4K max + + out->CursorBPP[location] = dml_cur_32bit; + out->CursorWidth[location] = 256; + + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; + + out->ViewportWidth[location] = width; + out->ViewportHeight[location] = height; + out->ViewportStationary[location] = false; + out->ViewportWidthChroma[location] = 0; + out->ViewportHeightChroma[location] = 0; + out->ViewportXStart[location] = 0; + out->ViewportXStartC[location] = 0; + out->ViewportYStart[location] = 0; + out->ViewportYStartC[location] = 0; + + out->ScalerEnabled[location] = false; + out->HRatio[location] = 1.0; + out->VRatio[location] = 1.0; + out->HRatioChroma[location] = 0; + out->VRatioChroma[location] = 0; + out->HTaps[location] = 1; + out->VTaps[location] = 1; + out->HTapsChroma[location] = 0; + out->VTapsChroma[location] = 0; + out->SourceScan[location] = dml_rotation_0; + out->ScalerRecoutWidth[location] = width; + + out->LBBitPerPixel[location] = 57; + + out->DynamicMetadataEnable[location] = false; + + out->NumberOfCursors[location] = 1; + out->UseMALLForStaticScreen[location] = dml_use_mall_static_screen_disable; + out->UseMALLForPStateChange[location] = dml_use_mall_pstate_change_disable; + + out->DETSizeOverride[location] = 256; + + out->ScalerEnabled[location] = false; +} + +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) +{ + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); + + out->CursorBPP[location] = dml_cur_32bit; + out->CursorWidth[location] = 256; + + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; + + out->ViewportWidth[location] = scaler_data->viewport.width; + out->ViewportHeight[location] = scaler_data->viewport.height; + out->ViewportWidthChroma[location] = scaler_data->viewport_c.width; + out->ViewportHeightChroma[location] = scaler_data->viewport_c.height; + out->ViewportXStart[location] = scaler_data->viewport.x; + out->ViewportYStart[location] = scaler_data->viewport.y; + out->ViewportXStartC[location] = scaler_data->viewport_c.x; + out->ViewportYStartC[location] = scaler_data->viewport_c.y; + out->ViewportStationary[location] = false; + + out->ScalerEnabled[location] = scaler_data->ratios.horz.value != dc_fixpt_one.value || + scaler_data->ratios.horz_c.value != dc_fixpt_one.value || + scaler_data->ratios.vert.value != dc_fixpt_one.value || + scaler_data->ratios.vert_c.value != dc_fixpt_one.value; + + /* Current driver code base uses LBBitPerPixel as 57. There is a discrepancy + * from the HW/DML teams about this value. Initialize LBBitPerPixel with the + * value current used in Navi3x . + */ + + out->LBBitPerPixel[location] = 57; + + if (out->ScalerEnabled[location] == false) { + out->HRatio[location] = 1; + out->HRatioChroma[location] = 1; + out->VRatio[location] = 1; + out->VRatioChroma[location] = 1; + } else { + /* Follow the original dml_wrapper.c code direction to fix scaling issues */ + out->HRatio[location] = (dml_float_t)scaler_data->ratios.horz.value / (1ULL << 32); + out->HRatioChroma[location] = (dml_float_t)scaler_data->ratios.horz_c.value / (1ULL << 32); + out->VRatio[location] = (dml_float_t)scaler_data->ratios.vert.value / (1ULL << 32); + out->VRatioChroma[location] = (dml_float_t)scaler_data->ratios.vert_c.value / (1ULL << 32); + } + + if (!scaler_data->taps.h_taps) { + out->HTaps[location] = 1; + out->HTapsChroma[location] = 1; + } else { + out->HTaps[location] = scaler_data->taps.h_taps; + out->HTapsChroma[location] = scaler_data->taps.h_taps_c; + } + if (!scaler_data->taps.v_taps) { + out->VTaps[location] = 1; + out->VTapsChroma[location] = 1; + } else { + out->VTaps[location] = scaler_data->taps.v_taps; + out->VTapsChroma[location] = scaler_data->taps.v_taps_c; + } + + out->SourceScan[location] = (enum dml_rotation_angle)in->rotation; + out->ScalerRecoutWidth[location] = in->dst_rect.width; + + out->DynamicMetadataEnable[location] = false; + out->DynamicMetadataLinesBeforeActiveRequired[location] = 0; + out->DynamicMetadataTransmittedBytes[location] = 0; + + out->NumberOfCursors[location] = 1; +} + +static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, + const struct dc_stream_state *stream, const struct dml_display_cfg_st *dml_dispcfg) +{ + int i = 0; + int location = -1; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { + location = i; + break; + } + } + + return location; +} + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, + const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) +{ + unsigned int plane_id; + int i = 0; + int location = -1; + + if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) { + ASSERT(false); + return -1; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { + location = i; + break; + } + } + + return location; +} + +static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) +{ + unsigned int i; + unsigned int pipe_index = 0; + unsigned int plane_index = 0; + struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe || !pipe->stream || !pipe->plane_state) + continue; + + while (pipe) { + pipe_index = pipe->pipe_idx; + + if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index; + plane_index++; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true; + } + + pipe = pipe->bottom_pipe; + } + + plane_index = 0; + } +} + +static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, + unsigned int location, const struct dc_stream_state *in) +{ + if (in->num_wb_info > 0) { + for (int i = 0; i < __DML_NUM_DMB__; i++) { + const struct dc_writeback_info *wb_info = &in->writeback_info[i]; + /*current dml support 1 dwb per stream, limitation*/ + if (wb_info->wb_enabled) { + out->WritebackEnable[location] = wb_info->wb_enabled; + out->ActiveWritebacksPerSurface[location] = wb_info->dwb_params.cnv_params.src_width; + out->WritebackDestinationWidth[location] = wb_info->dwb_params.dest_width; + out->WritebackDestinationHeight[location] = wb_info->dwb_params.dest_height; + + out->WritebackSourceWidth[location] = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + + out->WritebackSourceHeight[location] = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + /*current design does not have chroma scaling, need to follow up*/ + out->WritebackHTaps[location] = wb_info->dwb_params.scaler_taps.h_taps > 0 ? + wb_info->dwb_params.scaler_taps.h_taps : 1; + out->WritebackVTaps[location] = wb_info->dwb_params.scaler_taps.v_taps > 0 ? + wb_info->dwb_params.scaler_taps.v_taps : 1; + + out->WritebackHRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + out->WritebackVRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + } + } + } +} + +static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context) +{ + int i; + struct pipe_ctx *current_pipe_context; + + /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */ + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1; + } + + /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */ + for (i = 0; i < MAX_PIPES; i++) { + current_pipe_context = &context->res_ctx.pipe_ctx[i]; + if (current_pipe_context->stream && + current_pipe_context->stream_res.hpo_dp_stream_enc && + current_pipe_context->link_res.hpo_dp_link_enc && + dc_is_dp_signal(current_pipe_context->stream->signal)) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] = + current_pipe_context->link_res.hpo_dp_link_enc->inst; + } + } +} + +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) +{ + int i = 0, j = 0, k = 0; + int disp_cfg_stream_location, disp_cfg_plane_location; + enum mall_stream_type stream_mall_type; + struct pipe_ctx *current_pipe_context; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = false; + } + + //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file + dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; + dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; + dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; + dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; + if (dml2->v20.dml_core_ctx.ip.hostvm_enable) + dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + + dml2_populate_pipe_to_plane_index_mapping(dml2, context); + dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context); + + for (i = 0; i < context->stream_count; i++) { + current_pipe_context = NULL; + for (k = 0; k < MAX_PIPES; k++) { + /* find one pipe allocated to this stream for the purpose of getting + info about the link later */ + if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) { + current_pipe_context = &context->res_ctx.pipe_ctx[k]; + break; + } + } + disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); + stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); + + if (disp_cfg_stream_location < 0) + disp_cfg_stream_location = dml_dispcfg->num_timings++; + + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); + /*Call site for populate_dml_writeback_cfg_from_stream_state*/ + populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, + disp_cfg_stream_location, context->streams[i]); + + switch (context->streams[i]->debug.force_odm_combine_segments) { + case 2: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1; + break; + case 4: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_4to1; + break; + default: + break; + } + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[i]->stream_id; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; + + if (context->stream_status[i].plane_count == 0) { + disp_cfg_plane_location = dml_dispcfg->num_surfaces++; + + populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); + + dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + } else { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2, + context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j); + + if (disp_cfg_plane_location < 0) + disp_cfg_plane_location = dml_dispcfg->num_surfaces++; + + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); + + if (stream_mall_type == SUBVP_MAIN) { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; + } else if (stream_mall_type == SUBVP_PHANTOM) { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable; + dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required; + } else { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_disable; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; + } + + dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; + + if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j, + &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + + if (j >= 1) { + populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2); + switch (context->streams[i]->debug.force_odm_combine_segments) { + case 2: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; + break; + case 4: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_4to1; + break; + default: + break; + } + + if (stream_mall_type == SUBVP_MAIN) + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; + else if (stream_mall_type == SUBVP_PHANTOM) + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_plane_location] = true; + + dml_dispcfg->num_timings++; + } + } + } + } +} + +void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, + struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, + struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, + struct pipe_ctx *out) +{ + memset(&out->rq_regs, 0, sizeof(out->rq_regs)); + out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; + out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; + out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; + out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; + out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; + out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; + out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; + out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; + + out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; + out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; + out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; + out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; + out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; + out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; + out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; + out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; + + out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; + out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; + out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; + out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; + out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; + + memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); + out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; + out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; + out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; + out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; + out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; + out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; + out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; + out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; + out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; + out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; + out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; + out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; + out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; + out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; + out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; + out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; + out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; + out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; + out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; + out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; + out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; + out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; + out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; + out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; + out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; + out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; + out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; + out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; + out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; + out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; + out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; + out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; + out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; + out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; + out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; + out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; + out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; + out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; + out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; + out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; + out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; + out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; + out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; + out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; + out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; + + memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); + out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; + out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; + out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; + out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; + out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; + out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; + out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; + out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; + out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; + out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; + out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; + out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; + out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; + out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; + out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; + out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; + out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; + out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; + out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; + out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h new file mode 100644 index 000000000000..d764773938f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_TRANSLATION_HELPER_H__ +#define __DML2_TRANSLATION_HELPER_H__ + +void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out); +void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out); +void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, + const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out); +void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out); +void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out); +void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); +void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); + +#endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c new file mode 100644 index 000000000000..9a33158b63bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +//#include "dml2_utils.h" +#include "display_mode_core.h" +#include "dml_display_rq_dlg_calc.h" +#include "dml2_internal_types.h" +#include "dml2_translation_helper.h" +#include "dml2_utils.h" + +void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index) +{ + dml_timing_array->HTotal[dst_index] = dml_timing_array->HTotal[src_index]; + dml_timing_array->VTotal[dst_index] = dml_timing_array->VTotal[src_index]; + dml_timing_array->HBlankEnd[dst_index] = dml_timing_array->HBlankEnd[src_index]; + dml_timing_array->VBlankEnd[dst_index] = dml_timing_array->VBlankEnd[src_index]; + dml_timing_array->RefreshRate[dst_index] = dml_timing_array->RefreshRate[src_index]; + dml_timing_array->VFrontPorch[dst_index] = dml_timing_array->VFrontPorch[src_index]; + dml_timing_array->PixelClock[dst_index] = dml_timing_array->PixelClock[src_index]; + dml_timing_array->HActive[dst_index] = dml_timing_array->HActive[src_index]; + dml_timing_array->VActive[dst_index] = dml_timing_array->VActive[src_index]; + dml_timing_array->Interlace[dst_index] = dml_timing_array->Interlace[src_index]; + dml_timing_array->DRRDisplay[dst_index] = dml_timing_array->DRRDisplay[src_index]; + dml_timing_array->VBlankNom[dst_index] = dml_timing_array->VBlankNom[src_index]; +} + +void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index) +{ + dml_plane_array->GPUVMMinPageSizeKBytes[dst_index] = dml_plane_array->GPUVMMinPageSizeKBytes[src_index]; + dml_plane_array->ForceOneRowForFrame[dst_index] = dml_plane_array->ForceOneRowForFrame[src_index]; + dml_plane_array->PTEBufferModeOverrideEn[dst_index] = dml_plane_array->PTEBufferModeOverrideEn[src_index]; + dml_plane_array->PTEBufferMode[dst_index] = dml_plane_array->PTEBufferMode[src_index]; + dml_plane_array->ViewportWidth[dst_index] = dml_plane_array->ViewportWidth[src_index]; + dml_plane_array->ViewportHeight[dst_index] = dml_plane_array->ViewportHeight[src_index]; + dml_plane_array->ViewportWidthChroma[dst_index] = dml_plane_array->ViewportWidthChroma[src_index]; + dml_plane_array->ViewportHeightChroma[dst_index] = dml_plane_array->ViewportHeightChroma[src_index]; + dml_plane_array->ViewportXStart[dst_index] = dml_plane_array->ViewportXStart[src_index]; + dml_plane_array->ViewportXStartC[dst_index] = dml_plane_array->ViewportXStartC[src_index]; + dml_plane_array->ViewportYStart[dst_index] = dml_plane_array->ViewportYStart[src_index]; + dml_plane_array->ViewportYStartC[dst_index] = dml_plane_array->ViewportYStartC[src_index]; + dml_plane_array->ViewportStationary[dst_index] = dml_plane_array->ViewportStationary[src_index]; + + dml_plane_array->ScalerEnabled[dst_index] = dml_plane_array->ScalerEnabled[src_index]; + dml_plane_array->HRatio[dst_index] = dml_plane_array->HRatio[src_index]; + dml_plane_array->VRatio[dst_index] = dml_plane_array->VRatio[src_index]; + dml_plane_array->HRatioChroma[dst_index] = dml_plane_array->HRatioChroma[src_index]; + dml_plane_array->VRatioChroma[dst_index] = dml_plane_array->VRatioChroma[src_index]; + dml_plane_array->HTaps[dst_index] = dml_plane_array->HTaps[src_index]; + dml_plane_array->VTaps[dst_index] = dml_plane_array->VTaps[src_index]; + dml_plane_array->HTapsChroma[dst_index] = dml_plane_array->HTapsChroma[src_index]; + dml_plane_array->VTapsChroma[dst_index] = dml_plane_array->VTapsChroma[src_index]; + dml_plane_array->LBBitPerPixel[dst_index] = dml_plane_array->LBBitPerPixel[src_index]; + + dml_plane_array->SourceScan[dst_index] = dml_plane_array->SourceScan[src_index]; + dml_plane_array->ScalerRecoutWidth[dst_index] = dml_plane_array->ScalerRecoutWidth[src_index]; + + dml_plane_array->DynamicMetadataEnable[dst_index] = dml_plane_array->DynamicMetadataEnable[src_index]; + dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[dst_index] = dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[src_index]; + dml_plane_array->DynamicMetadataTransmittedBytes[dst_index] = dml_plane_array->DynamicMetadataTransmittedBytes[src_index]; + dml_plane_array->DETSizeOverride[dst_index] = dml_plane_array->DETSizeOverride[src_index]; + + dml_plane_array->NumberOfCursors[dst_index] = dml_plane_array->NumberOfCursors[src_index]; + dml_plane_array->CursorWidth[dst_index] = dml_plane_array->CursorWidth[src_index]; + dml_plane_array->CursorBPP[dst_index] = dml_plane_array->CursorBPP[src_index]; + + dml_plane_array->UseMALLForStaticScreen[dst_index] = dml_plane_array->UseMALLForStaticScreen[src_index]; + dml_plane_array->UseMALLForPStateChange[dst_index] = dml_plane_array->UseMALLForPStateChange[src_index]; + + dml_plane_array->BlendingAndTiming[dst_index] = dml_plane_array->BlendingAndTiming[src_index]; +} + +void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index) +{ + dml_surface_array->SurfaceTiling[dst_index] = dml_surface_array->SurfaceTiling[src_index]; + dml_surface_array->SourcePixelFormat[dst_index] = dml_surface_array->SourcePixelFormat[src_index]; + dml_surface_array->PitchY[dst_index] = dml_surface_array->PitchY[src_index]; + dml_surface_array->SurfaceWidthY[dst_index] = dml_surface_array->SurfaceWidthY[src_index]; + dml_surface_array->SurfaceHeightY[dst_index] = dml_surface_array->SurfaceHeightY[src_index]; + dml_surface_array->PitchC[dst_index] = dml_surface_array->PitchC[src_index]; + dml_surface_array->SurfaceWidthC[dst_index] = dml_surface_array->SurfaceWidthC[src_index]; + dml_surface_array->SurfaceHeightC[dst_index] = dml_surface_array->SurfaceHeightC[src_index]; + + dml_surface_array->DCCEnable[dst_index] = dml_surface_array->DCCEnable[src_index]; + dml_surface_array->DCCMetaPitchY[dst_index] = dml_surface_array->DCCMetaPitchY[src_index]; + dml_surface_array->DCCMetaPitchC[dst_index] = dml_surface_array->DCCMetaPitchC[src_index]; + + dml_surface_array->DCCRateLuma[dst_index] = dml_surface_array->DCCRateLuma[src_index]; + dml_surface_array->DCCRateChroma[dst_index] = dml_surface_array->DCCRateChroma[src_index]; + dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[src_index]; + dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[src_index]; +} + +void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index) +{ + dml_output_array->DSCInputBitPerComponent[dst_index] = dml_output_array->DSCInputBitPerComponent[src_index]; + dml_output_array->OutputFormat[dst_index] = dml_output_array->OutputFormat[src_index]; + dml_output_array->OutputEncoder[dst_index] = dml_output_array->OutputEncoder[src_index]; + dml_output_array->OutputMultistreamId[dst_index] = dml_output_array->OutputMultistreamId[src_index]; + dml_output_array->OutputMultistreamEn[dst_index] = dml_output_array->OutputMultistreamEn[src_index]; + dml_output_array->OutputBpp[dst_index] = dml_output_array->OutputBpp[src_index]; + dml_output_array->PixelClockBackEnd[dst_index] = dml_output_array->PixelClockBackEnd[src_index]; + dml_output_array->DSCEnable[dst_index] = dml_output_array->DSCEnable[src_index]; + dml_output_array->OutputLinkDPLanes[dst_index] = dml_output_array->OutputLinkDPLanes[src_index]; + dml_output_array->OutputLinkDPRate[dst_index] = dml_output_array->OutputLinkDPRate[src_index]; + dml_output_array->ForcedOutputLinkBPP[dst_index] = dml_output_array->ForcedOutputLinkBPP[src_index]; + dml_output_array->AudioSampleRate[dst_index] = dml_output_array->AudioSampleRate[src_index]; + dml_output_array->AudioSampleLayout[dst_index] = dml_output_array->AudioSampleLayout[src_index]; +} + +unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled) +{ + switch (encoder) { + case dml_dp: + case dml_edp: + return 2; + case dml_dp2p0: + if (dsc_enabled || force_odm_4to1) + return 4; + else + return 2; + case dml_hdmi: + return 1; + case dml_hdmifrl: + if (force_odm_4to1) + return 4; + else + return 2; + default: + return 1; + } +} + +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +{ + if (pipe_ctx == NULL || pipe_ctx->stream == NULL) + return false; + + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + +bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context) +{ + context->bw_ctx.bw.dcn.clk.dispclk_khz = out_clks->dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = out_clks->dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = out_clks->uclk_mts / 16; + context->bw_ctx.bw.dcn.clk.fclk_khz = out_clks->fclk_khz; + context->bw_ctx.bw.dcn.clk.phyclk_khz = out_clks->phyclk_khz; + context->bw_ctx.bw.dcn.clk.socclk_khz = out_clks->socclk_khz; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = out_clks->ref_dtbclk_khz; + context->bw_ctx.bw.dcn.clk.p_state_change_support = out_clks->p_state_supported; +} + +int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) + return i; + } + + return -1; +} + +static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) + return i; + } + + return -1; +} + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + unsigned int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i]->stream_id == stream_id) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + + hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right; + vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; + hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; + vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; + + hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right; + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; + + pipe_ctx->pipe_dlg_param.vstartup_start = dml_get_vstartup_calculated(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vupdate_offset = dml_get_vupdate_offset(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vupdate_width = dml_get_vupdate_width(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vready_offset = dml_get_vready_offset(mode_lib, pipe_idx); + + pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; + + pipe_ctx->pipe_dlg_param.hactive = hactive; + pipe_ctx->pipe_dlg_param.vactive = vactive; + pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; + pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; + pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; + pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; + pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; + pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; + pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; + pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; + pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; + pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; + pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; + pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; + pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; +} + +void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) +{ + unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id; + enum mall_stream_type pipe_mall_type; + struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch; + + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + context->bw_ctx.bw.dcn.compbuf_size_kb = in_ctx->v20.dml_core_ctx.ip.config_return_buffer_size_in_kbytes; + + for (dc_pipe_ctx_index = 0; dc_pipe_ctx_index < pipe_cnt; dc_pipe_ctx_index++) { + if (!context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream) + continue; + /* The DML2 and the DC logic of determining pipe indices are different from each other so + * there is a need to know which DML pipe index maps to which DC pipe. The code below + * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then + * it finds a dml_pipe_index from the stream id. */ + if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) { + dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); + } else { + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); + } + + if (dml_pipe_idx == 0xFFFFFFFF) + continue; + ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[dml_pipe_idx]); + ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_pipe_idx] == context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); + + /* Use the dml_pipe_index here for the getters to fetch the correct values and dc_pipe_index in the pipe_ctx to populate them + * at the right locations. + */ + populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + + pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]); + if (pipe_mall_type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = dml_get_det_buffer_size_kbytes(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState; + } + + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb; + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz = dml_get_dppclk_calculated(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx) * 1000; + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz) + context->bw_ctx.bw.dcn.clk.dppclk_khz = context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz; + + dml_rq_dlg_get_rq_reg(&s->rq_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); + dml_rq_dlg_get_dlg_reg(&s->disp_dlg_regs, &s->disp_ttu_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); + dml2_update_pipe_ctx_dchub_regs(&s->rq_regs, &s->disp_dlg_regs, &s->disp_ttu_regs, &out_new_hw_state->pipe_ctx[dc_pipe_ctx_index]); + + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes = dml_get_surface_size_for_mall(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + + /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ + /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ + if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state && + (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe == NULL || + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) && + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) { + /* SS: all active surfaces stored in MALL */ + if (pipe_mall_type != SUBVP_PHANTOM) { + context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; + } else { + /* SUBVP: phantom surfaces only stored in MALL */ + context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; + } + } + } + + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dispclk_mhz + * 1000; + + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz ; + } +} + +void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx) +{ + watermark->urgent_ns = dml_get_wm_urgent(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_enter_plus_exit_ns = dml_get_wm_stutter_enter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_exit_ns = dml_get_wm_stutter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.pstate_change_ns = dml_get_wm_dram_clock_change(dml_core_ctx) * 1000; + watermark->pte_meta_urgent_ns = dml_get_wm_memory_trip(dml_core_ctx) * 1000; + watermark->frac_urg_bw_nom = dml_get_fraction_of_urgent_bandwidth(dml_core_ctx) * 1000; + watermark->frac_urg_bw_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(dml_core_ctx) * 1000; + watermark->urgent_latency_ns = dml_get_urgent_latency(dml_core_ctx) * 1000; + watermark->cstate_pstate.fclk_pstate_change_ns = dml_get_wm_fclk_change(dml_core_ctx) * 1000; + watermark->usr_retraining_ns = dml_get_wm_usr_retraining(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_enter_plus_exit_z8_ns = dml_get_wm_z8_stutter_enter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_exit_z8_ns = dml_get_wm_z8_stutter(dml_core_ctx) * 1000; +} + +unsigned int dml2_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark) +{ + unsigned int time_per_byte = 0; + unsigned int total_free_entry = 0xb40; + unsigned int buf_lh_capability; + unsigned int max_scaled_time; + + if (mode == PACKED_444) /* packed mode 32 bpp */ + time_per_byte = time_per_pixel/4; + else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */ + time_per_byte = time_per_pixel/8; + + if (time_per_byte == 0) + time_per_byte = 1; + + buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/ + max_scaled_time = buf_lh_capability - urgent_watermark; + return max_scaled_time; +} + +void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) +{ + int i, j = 0; + struct mcif_arb_params *wb_arb_params = NULL; + struct dcn_bw_writeback *bw_writeback = NULL; + enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ + + if (context->stream_count != 0) { + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->num_wb_info != 0) + j++; + } + } + if (j == 0) /*no dwb */ + return; + for (i = 0; i < __DML_NUM_DMB__; i++) { + bw_writeback = &context->bw_ctx.bw.dcn.bw_writeback; + wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[i]; + + for (j = 0 ; j < 4; j++) { + /*current dml only has one set of watermark, need to follow up*/ + bw_writeback->mcif_wb_arb[i].cli_watermark[j] = + dml_get_wm_writeback_urgent(dml_core_ctx) * 1000; + bw_writeback->mcif_wb_arb[i].pstate_watermark[j] = + dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; + } + if (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk != 0) { + /* time_per_pixel should be in u6.6 format */ + bw_writeback->mcif_wb_arb[i].time_per_pixel = + (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; + } + bw_writeback->mcif_wb_arb[i].slice_lines = 32; + bw_writeback->mcif_wb_arb[i].arbitration_slice = 2; + bw_writeback->mcif_wb_arb[i].max_scaled_time = + dml2_calc_max_scaled_time(wb_arb_params->time_per_pixel, + wbif_mode, wb_arb_params->cli_watermark[0]); + /*not required any more*/ + bw_writeback->mcif_wb_arb[i].dram_speed_change_duration = + dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; + + } +} +void dml2_initialize_det_scratch(struct dml2_context *in_ctx) +{ + int i; + + for (i = 0; i < MAX_PLANES; i++) { + in_ctx->det_helper_scratch.dpps_per_surface[i] = 1; + } +} + +static unsigned int find_planes_per_stream_and_stream_count(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg, int *num_of_planes_per_stream) +{ + unsigned int plane_index, stream_index = 0, num_of_streams; + + for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { + /* Number of planes per stream */ + num_of_planes_per_stream[stream_index] += 1; + + if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) + stream_index++; + } + + num_of_streams = stream_index + 1; + + return num_of_streams; +} + +void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg) +{ + unsigned int num_of_streams = 0, plane_index = 0, max_det_size, stream_index = 0; + int num_of_planes_per_stream[__DML_NUM_PLANES__] = { 0 }; + + max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; + + num_of_streams = find_planes_per_stream_and_stream_count(in_ctx, dml_dispcfg, num_of_planes_per_stream); + + for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { + + if (in_ctx->config.override_det_buffer_size_kbytes) + dml_dispcfg->plane.DETSizeOverride[plane_index] = max_det_size / in_ctx->config.dcn_pipe_count; + else { + dml_dispcfg->plane.DETSizeOverride[plane_index] = ((max_det_size / num_of_streams) / num_of_planes_per_stream[stream_index] / in_ctx->det_helper_scratch.dpps_per_surface[plane_index]); + + /* If the override size is not divisible by det_segment_size then round off to nearest number divisible by det_segment_size as + * this is a requirement. + */ + if (dml_dispcfg->plane.DETSizeOverride[plane_index] % in_ctx->config.det_segment_size != 0) { + dml_dispcfg->plane.DETSizeOverride[plane_index] = dml_dispcfg->plane.DETSizeOverride[plane_index] & ~0x3F; + } + + if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) + stream_index++; + } + } +} + +bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch) +{ + unsigned int i = 0, dml_pipe_idx = 0, plane_id = 0; + unsigned int max_det_size, total_det_allocated = 0; + bool need_recalculation = false; + + max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; + + for (i = 0; i < MAX_PIPES; i++) { + if (!display_state->res_ctx.pipe_ctx[i].stream) + continue; + if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state, + display_state->res_ctx.pipe_ctx[i].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id)) + dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); + else + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id); + + if (dml_pipe_idx == 0xFFFFFFFF) + continue; + total_det_allocated += dml_get_det_buffer_size_kbytes(&in_ctx->v20.dml_core_ctx, dml_pipe_idx); + if (total_det_allocated > max_det_size) { + need_recalculation = true; + } + } + + /* Store the DPPPerSurface for correctly determining the number of planes in the next call. */ + for (i = 0; i < MAX_PLANES; i++) { + det_scratch->dpps_per_surface[i] = in_ctx->v20.scratch.cur_display_config.hw.DPPPerSurface[i]; + } + + return need_recalculation; +} + +bool dml2_is_stereo_timing(const struct dc_stream_state *stream) +{ + bool is_stereo = false; + + if ((stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)) + is_stereo = true; + + return is_stereo; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h new file mode 100644 index 000000000000..04fcfe637119 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _DML2_UTILS_H_ +#define _DML2_UTILS_H_ + +#include "os_types.h" +#include "dml2_dc_types.h" + +struct dc; +struct dml_timing_cfg_st; +struct dml2_dcn_clocks; +struct dc_state; + +void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index); +unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled); +void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context); +void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx); +void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx); +int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); +bool is_dtbclk_required(const struct dc *dc, struct dc_state *context); +bool dml2_is_stereo_timing(const struct dc_stream_state *stream); +unsigned int dml2_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark); + +/* + * dml2_dc_construct_pipes - This function will determine if we need additional pipes based + * on the DML calculated outputs for MPC, ODM and allocate them as necessary. This function + * could be called after in dml_validate_build_resource after dml_mode_pragramming like : + * { + * ... + * map_hw_resources(&s->cur_display_config, &s->mode_support_info); + * result = dml_mode_programming(&in_ctx->dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + * dml2_dc_construct_pipes(in_display_state, s->mode_support_info, out_hw_context); + * ... + * } + * + * @context: To obtain res_ctx and read other information like stream ID etc. + * @dml_mode_support_st : To get the ODM, MPC outputs as determined by the DML. + * @out_hw_context : Handle to the new hardware context. + * + * + * Return: None. + */ +void dml2_dc_construct_pipes(struct dc_state *context, struct dml_mode_support_info_st *dml_mode_support_st, + struct resource_context *out_hw_context); + +/* + * dml2_predict_pipe_split - This function is the dml2 version of predict split pipe. It predicts a + * if pipe split is required or not and returns the output as a bool. + * @context : dc_state. + * @pipe : old_index is the index of the pipe as derived from pipe_idx. + * @index : index of the pipe + * + * + * Return: Returns the result in boolean. + */ +bool dml2_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); + +/* + * dml2_build_mapped_resource - This function is the dml2 version of build_mapped_resource. + * In case of ODM, we need to build pipe hardware params again as done in dcn20_build_mapped_resource. + * @dc : struct dc + * @context : struct dc_state. + * @stream : stream whoose corresponding pipe params need to be modified. + * + * + * Return: Returns DC_OK if successful. + */ +enum dc_status dml2_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); + +/* + * dml2_extract_rq_regs - This function will extract information needed for struct _vcs_dpi_display_rq_regs_st + * and populate it. + * @context: To obtain and populate the res_ctx->pipe_ctx->rq_regs with DML outputs. + * @support : This structure has the DML intermediate outputs required to populate rq_regs. + * + * + * Return: None. + */ + + /* + * dml2_calculate_rq_and_dlg_params - This function will call into DML2 functions needed + * for populating rq, ttu and dlg param structures and populate it. + * @dc : struct dc + * @context : dc_state provides a handle to selectively populate pipe_ctx + * @out_new_hw_state: To obtain and populate the rq, dlg and ttu regs in + * out_new_hw_state->pipe_ctx with DML outputs. + * @in_ctx : This structure has the pointer to display_mode_lib_st. + * @pipe_cnt : DML functions to obtain RQ, TTu and DLG params need a pipe_index. + * This helps provide pipe_index in the pipe_cnt loop. + * + * + * Return: None. + */ +void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt); + +/* + * dml2_apply_det_buffer_allocation_policy - This function will determine the DET Buffer size + * and return the number of streams. + * @dml2 : Handle for dml2 context + * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config + * Return : None. + */ +void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg); + +/* + * dml2_verify_det_buffer_configuration - This function will verify if the allocated DET buffer exceeds + * the total available DET size available and outputs a boolean to indicate if recalulation is needed. + * @dml2 : Handle for dml2 context + * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config + * @struct dml2_helper_det_policy_scratch : Pointer to DET helper scratch + * Return : returns true if recalculation is required, false otherwise. + */ +bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch); + +/* + * dml2_initialize_det_scratch - This function will initialize the DET scratch space as per requirements. + * @dml2 : Handle for dml2 context + * Return : None + */ +void dml2_initialize_det_scratch(struct dml2_context *in_ctx); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c new file mode 100644 index 000000000000..9deb03a18ccc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c @@ -0,0 +1,704 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_policy.h" +#include "dml2_translation_helper.h" +#include "dml2_mall_phantom.h" +#include "dml2_dc_resource_mgmt.h" +#include "dml21_wrapper.h" + +static void initialize_dml2_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_ip_params(dml2, in_dc, out); + else + dml2_translate_ip_params(in_dc, out); +} + +static void initialize_dml2_soc_bbox(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_socbb_params(dml2, in_dc, out); + else + dml2_translate_socbb_params(in_dc, out); +} + +static void initialize_dml2_soc_states(struct dml2_context *dml2, + const struct dc *in_dc, const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_soc_states(dml2, in_dc, in_bbox, out); + else + dml2_translate_soc_states(in_dc, out, in_dc->dml.soc.num_states); +} + +static void map_hw_resources(struct dml2_context *dml2, + struct dml_display_cfg_st *in_out_display_cfg, struct dml_mode_support_info_st *mode_support_info) +{ + unsigned int num_pipes = 0; + int i, j; + + for (i = 0; i < __DML_NUM_PLANES__; i++) { + in_out_display_cfg->hw.ODMMode[i] = mode_support_info->ODMMode[i]; + in_out_display_cfg->hw.DPPPerSurface[i] = mode_support_info->DPPPerSurface[i]; + in_out_display_cfg->hw.DSCEnabled[i] = mode_support_info->DSCEnabled[i]; + in_out_display_cfg->hw.NumberOfDSCSlices[i] = mode_support_info->NumberOfDSCSlices[i]; + in_out_display_cfg->hw.DLGRefClkFreqMHz = 24; + if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && + dml2->v20.dml_core_ctx.project != dml_project_dcn36 && + dml2->v20.dml_core_ctx.project != dml_project_dcn351) { + /*dGPU default as 50Mhz*/ + in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; + } + for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[num_pipes] = true; + num_pipes++; + } + } +} + +static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2, + const struct dml_display_cfg_st *display_cfg, + struct dml_mode_support_info_st *evaluation_info, + enum dc_validate_mode validate_mode) +{ + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + + s->mode_support_params.mode_lib = &dml2->v20.dml_core_ctx; + s->mode_support_params.in_display_cfg = display_cfg; + if (validate_mode == DC_VALIDATE_MODE_ONLY) + s->mode_support_params.in_start_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; + else + s->mode_support_params.in_start_state_idx = 0; + s->mode_support_params.out_evaluation_info = evaluation_info; + + memset(evaluation_info, 0, sizeof(struct dml_mode_support_info_st)); + s->mode_support_params.out_lowest_state_idx = 0; + + return dml_mode_support_ex(&s->mode_support_params); +} + +static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrapper_optimize_configuration_params *p) +{ + int unused_dpps = p->ip_params->max_num_dpp; + int i; + int odms_needed; + int largest_blend_and_timing = 0; + bool optimization_done = false; + + for (i = 0; i < (int) p->cur_display_config->num_timings; i++) { + if (p->cur_display_config->plane.BlendingAndTiming[i] > largest_blend_and_timing) + largest_blend_and_timing = p->cur_display_config->plane.BlendingAndTiming[i]; + } + + if (p->new_policy != p->cur_policy) + *p->new_policy = *p->cur_policy; + + if (p->new_display_config != p->cur_display_config) + *p->new_display_config = *p->cur_display_config; + + + // Optimize Clocks + if (!optimization_done) { + if (largest_blend_and_timing == 0 && p->cur_policy->ODMUse[0] == dml_odm_use_policy_combine_as_needed && dml2->config.minimize_dispclk_using_odm) { + odms_needed = dml2_util_get_maximum_odm_combine_for_output(dml2->config.optimize_odm_4to1, + p->cur_display_config->output.OutputEncoder[0], p->cur_mode_support_info->DSCEnabled[0]) - 1; + + if (odms_needed <= unused_dpps) { + if (odms_needed == 1) { + p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_2to1; + optimization_done = true; + } else if (odms_needed == 3) { + p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_4to1; + optimization_done = true; + } else + optimization_done = false; + } + } + } + + return optimization_done; +} + +static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state, + enum dc_validate_mode validate_mode) +{ + struct dml2_calculate_lowest_supported_state_for_temp_read_scratch *s = &dml2->v20.scratch.dml2_calculate_lowest_supported_state_for_temp_read_scratch; + struct dml2_wrapper_scratch *s_global = &dml2->v20.scratch; + + unsigned int dml_result = 0; + int result = -1, i, j; + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + /* Zero out before each call before proceeding */ + memset(s, 0, sizeof(struct dml2_calculate_lowest_supported_state_for_temp_read_scratch)); + memset(&s_global->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); + memset(&s_global->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + + for (i = 0; i < dml2->config.dcn_pipe_count; i++) { + /* Calling resource_build_scaling_params will populate the pipe params + * with the necessary information needed for correct DML calculations + * This is also done in DML1 driver code path and hence display_state + * cannot be const. + */ + struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!dml2->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + return false; + } + } + } + + map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); + + for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { + s->uclk_change_latencies[i] = dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us; + } + + for (i = 0; i < 4; i++) { + for (j = 0; j < dml2->v20.dml_core_ctx.states.num_states; j++) { + dml2->v20.dml_core_ctx.states.state_array[j].dram_clock_change_latency_us = s_global->dummy_pstate_table[i].dummy_pstate_latency_us; + } + + dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info, + validate_mode); + + if (dml_result && s->evaluation_info.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive) { + map_hw_resources(dml2, &s->cur_display_config, &s->evaluation_info); + dml_result = dml_mode_programming(&dml2->v20.dml_core_ctx, s_global->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + + ASSERT(dml_result); + + dml2_extract_watermark_set(&dml2->v20.g6_temp_read_watermark_set, &dml2->v20.dml_core_ctx); + dml2->v20.g6_temp_read_watermark_set.cstate_pstate.fclk_pstate_change_ns = dml2->v20.g6_temp_read_watermark_set.cstate_pstate.pstate_change_ns; + + result = s_global->mode_support_params.out_lowest_state_idx; + + while (dml2->v20.dml_core_ctx.states.state_array[result].dram_speed_mts < s_global->dummy_pstate_table[i].dram_speed_mts) + result++; + + break; + } + } + + for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { + dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us = s->uclk_change_latencies[i]; + } + + return result; +} + +static void copy_dummy_pstate_table(struct dummy_pstate_entry *dest, struct dummy_pstate_entry *src, unsigned int num_entries) +{ + for (int i = 0; i < num_entries; i++) { + dest[i] = src[i]; + } +} + +static bool are_timings_requiring_odm_doing_blending(const struct dml_display_cfg_st *display_cfg, + const struct dml_mode_support_info_st *evaluation_info) +{ + unsigned int planes_per_timing[__DML_NUM_PLANES__] = {0}; + int i; + + for (i = 0; i < display_cfg->num_surfaces; i++) + planes_per_timing[display_cfg->plane.BlendingAndTiming[i]]++; + + for (i = 0; i < __DML_NUM_PLANES__; i++) { + if (planes_per_timing[i] > 1 && evaluation_info->ODMMode[i] != dml_odm_mode_bypass) + return true; + } + + return false; +} + +static bool does_configuration_meet_sw_policies(struct dml2_context *ctx, const struct dml_display_cfg_st *display_cfg, + const struct dml_mode_support_info_st *evaluation_info) +{ + bool pass = true; + + if (!ctx->config.enable_windowed_mpo_odm) { + if (are_timings_requiring_odm_doing_blending(display_cfg, evaluation_info)) + pass = false; + } + + return pass; +} + +static bool dml_mode_support_wrapper(struct dml2_context *dml2, + struct dc_state *display_state, + enum dc_validate_mode validate_mode) +{ + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + unsigned int result = 0, i; + unsigned int optimized_result = true; + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + /* Zero out before each call before proceeding */ + memset(&s->cur_display_config, 0, sizeof(struct dml_display_cfg_st)); + memset(&s->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); + memset(&s->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&s->optimize_configuration_params, 0, sizeof(struct dml2_wrapper_optimize_configuration_params)); + + for (i = 0; i < dml2->config.dcn_pipe_count; i++) { + /* Calling resource_build_scaling_params will populate the pipe params + * with the necessary information needed for correct DML calculations + * This is also done in DML1 driver code path and hence display_state + * cannot be const. + */ + struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!dml2->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + return false; + } + } + } + + map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &s->cur_display_config); + + result = pack_and_call_dml_mode_support_ex(dml2, + &s->cur_display_config, + &s->mode_support_info, + validate_mode); + + if (result) + result = does_configuration_meet_sw_policies(dml2, &s->cur_display_config, &s->mode_support_info); + + // Try to optimize + if (result) { + s->cur_policy = dml2->v20.dml_core_ctx.policy; + s->optimize_configuration_params.dml_core_ctx = &dml2->v20.dml_core_ctx; + s->optimize_configuration_params.config = &dml2->config; + s->optimize_configuration_params.ip_params = &dml2->v20.dml_core_ctx.ip; + s->optimize_configuration_params.cur_display_config = &s->cur_display_config; + s->optimize_configuration_params.cur_mode_support_info = &s->mode_support_info; + s->optimize_configuration_params.cur_policy = &s->cur_policy; + s->optimize_configuration_params.new_display_config = &s->new_display_config; + s->optimize_configuration_params.new_policy = &s->new_policy; + + while (optimized_result && optimize_configuration(dml2, &s->optimize_configuration_params)) { + dml2->v20.dml_core_ctx.policy = s->new_policy; + optimized_result = pack_and_call_dml_mode_support_ex(dml2, + &s->new_display_config, + &s->mode_support_info, + validate_mode); + + if (optimized_result) + optimized_result = does_configuration_meet_sw_policies(dml2, &s->new_display_config, &s->mode_support_info); + + // If the new optimized state is supposed, then set current = new + if (optimized_result) { + s->cur_display_config = s->new_display_config; + s->cur_policy = s->new_policy; + } else { + // Else, restore policy to current + dml2->v20.dml_core_ctx.policy = s->cur_policy; + } + } + + // Optimize ended with a failed config, so we need to restore DML state to last passing + if (!optimized_result) { + result = pack_and_call_dml_mode_support_ex(dml2, + &s->cur_display_config, + &s->mode_support_info, + validate_mode); + } + } + + if (result) + map_hw_resources(dml2, &s->cur_display_config, &s->mode_support_info); + + return result; +} + +static bool call_dml_mode_support_and_programming(struct dc_state *context, enum dc_validate_mode validate_mode) +{ + unsigned int result = 0; + unsigned int min_state = 0; + int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + + struct dml2_context *dml2 = context->bw_ctx.dml2; + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context, + validate_mode); + + ASSERT(min_state_for_g6_temp_read >= 0); + } + + result = dml_mode_support_wrapper(dml2, context, validate_mode); + + /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. + * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. + */ + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } + return result; +} + +static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context, + enum dc_validate_mode validate_mode) +{ + struct dml2_context *dml2 = context->bw_ctx.dml2; + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + struct dml2_dcn_clocks out_clks; + unsigned int result = 0; + bool need_recalculation = false; + uint32_t cstate_enter_plus_exit_z8_ns; + + if (context->stream_count == 0) { + unsigned int lowest_state_idx = 0; + + out_clks.p_state_supported = true; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ + out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; + out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; + out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; + out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; + out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; + out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.dtbclk_en = false; + dml2_copy_clocks_to_dc_state(&out_clks, context); + return true; + } + + /* Zero out before each call before proceeding */ + memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); + memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); + memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); + memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); + + /* Initialize DET scratch */ + dml2_initialize_det_scratch(dml2); + + copy_dummy_pstate_table(s->dummy_pstate_table, in_dc->clk_mgr->bw_params->dummy_pstate_table, 4); + + result = call_dml_mode_support_and_programming(context, validate_mode); + /* Call map dc pipes to map the pipes based on the DML output. For correctly determining if recalculation + * is required or not, the resource context needs to correctly reflect the number of active pipes. We would + * only know the correct number if active pipes after dml2_map_dc_pipes is called. + */ + if (result && !dml2->config.skip_hw_state_mapping) + dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); + + /* Verify and update DET Buffer configuration if needed. dml2_verify_det_buffer_configuration will check if DET Buffer + * size needs to be updated. If yes it will update the DETOverride variable and set need_recalculation flag to true. + * Based on that flag, run mode support again. Verification needs to be run after dml_mode_programming because the getters + * return correct det buffer values only after dml_mode_programming is called. + */ + if (result && !dml2->config.skip_hw_state_mapping) { + need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); + if (need_recalculation) { + /* Engage the DML again if recalculation is required. */ + call_dml_mode_support_and_programming(context, validate_mode); + if (!dml2->config.skip_hw_state_mapping) { + dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); + } + need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); + ASSERT(need_recalculation == false); + } + } + + if (result) { + unsigned int lowest_state_idx = s->mode_support_params.out_lowest_state_idx; + out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.mp.Dispclk_calculated * 1000; + out_clks.p_state_supported = s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported; + if (in_dc->config.use_default_clock_table && + (lowest_state_idx < dml2->v20.dml_core_ctx.states.num_states - 1)) { + lowest_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; + out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + } + + out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; + out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; + out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; + out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; + out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; + out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(in_dc, context); + + if (!dml2->config.skip_hw_state_mapping) { + /* Call dml2_calculate_rq_and_dlg_params */ + dml2_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml2, in_dc->res_pool->pipe_count); + } + + dml2_copy_clocks_to_dc_state(&out_clks, context); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); + if (context->streams[0]->sink->link->dc->caps.is_apu) + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx); + else + memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); + dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx); + //copy for deciding zstate use + context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod; + + cstate_enter_plus_exit_z8_ns = context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns; + + if (context->bw_ctx.dml.vba.StutterPeriod < in_dc->debug.minimum_z8_residency_time && + cstate_enter_plus_exit_z8_ns < in_dc->debug.minimum_z8_residency_time * 1000) + cstate_enter_plus_exit_z8_ns = in_dc->debug.minimum_z8_residency_time * 1000; + + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns; + } + + return result; +} + +static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode validate_mode) +{ + struct dml2_context *dml2; + unsigned int result = 0; + + if (!context || context->stream_count == 0) + return true; + + dml2 = context->bw_ctx.dml2; + + /* Zero out before each call before proceeding */ + memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); + memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); + memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); + memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); + + result = pack_and_call_dml_mode_support_ex(dml2, + &dml2->v20.scratch.cur_display_config, + &dml2->v20.scratch.mode_support_info, + validate_mode); + + if (result) + result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info); + + return result == 1; +} + +static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) +{ + if (dc->debug.override_odm_optimization) { + dml2->config.minimize_dispclk_using_odm = dc->debug.minimize_dispclk_using_odm; + } +} + +bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, + enum dc_validate_mode validate_mode) +{ + bool out = false; + + if (!dml2) + return false; + dml2_apply_debug_options(in_dc, dml2); + + /* DML2.1 validation path */ + if (dml2->architecture == dml2_architecture_21) { + out = dml21_validate(in_dc, context, dml2, validate_mode); + return out; + } + + DC_FP_START(); + + /* Use dml_validate_only for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + out = dml2_validate_only(context, validate_mode); + else + out = dml2_validate_and_build_resource(in_dc, context, validate_mode); + + DC_FP_END(); + + return out; +} + +static inline struct dml2_context *dml2_allocate_memory(void) +{ + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); +} + +static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) +{ + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); + return; + } + + // Store config options + (*dml2)->config = *config; + + switch (in_dc->ctx->dce_version) { + case DCN_VERSION_3_5: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn35; + break; + case DCN_VERSION_3_51: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn351; + break; + case DCN_VERSION_3_6: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn36; + break; + case DCN_VERSION_3_2: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn32; + break; + case DCN_VERSION_3_21: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn321; + break; + case DCN_VERSION_4_01: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn401; + break; + default: + (*dml2)->v20.dml_core_ctx.project = dml_project_default; + break; + } + + DC_FP_START(); + + initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); + + initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); + + initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); + + DC_FP_END(); +} + +bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) +{ + // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) + return dml21_create(in_dc, dml2, config); + + // Allocate Mode Lib Ctx + *dml2 = dml2_allocate_memory(); + + if (!(*dml2)) + return false; + + dml2_init(in_dc, config, dml2); + + return true; +} + +void dml2_destroy(struct dml2_context *dml2) +{ + if (!dml2) + return; + + if (dml2->architecture == dml2_architecture_21) + dml21_destroy(dml2); + vfree(dml2); +} + +void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, + unsigned int *fclk_change_support, unsigned int *dram_clk_change_support) +{ + *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0]; + *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; +} + +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2) +{ + if (dml2->architecture == dml2_architecture_21) + dml21_prepare_mcache_programming(in_dc, context, dml2); +} + +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2) +{ + if (src_dml2->architecture == dml2_architecture_21) { + dml21_copy(dst_dml2, src_dml2); + return; + } + /* copy Mode Lib Ctx */ + memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context)); +} + +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2) +{ + if (src_dml2->architecture == dml2_architecture_21) + return dml21_create_copy(dst_dml2, src_dml2); + /* Allocate Mode Lib Ctx */ + *dst_dml2 = dml2_allocate_memory(); + + if (!(*dst_dml2)) + return false; + + /* copy Mode Lib Ctx */ + dml2_copy(*dst_dml2, src_dml2); + + return true; +} + +void dml2_reinit(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2) +{ + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); + return; + } + + dml2_init(in_dc, config, dml2); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h new file mode 100644 index 000000000000..c384e141cebc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DML2_WRAPPER_H_ +#define _DML2_WRAPPER_H_ + +#include "os_types.h" + +#define DML2_MAX_NUM_DPM_LVL 30 + +struct dml2_context; +struct display_mode_lib_st; +struct dc; +struct pipe_ctx; +struct dc_plane_state; +struct dc_sink; +struct dc_stream_state; +struct resource_context; +struct display_stream_compressor; +struct dc_mcache_params; + +// Configuration of the MALL on the SoC +struct dml2_soc_mall_info { + // Cache line size of 0 means MALL is not enabled/present + unsigned int cache_line_size_bytes; + unsigned int cache_num_ways; + unsigned int max_cab_allocation_bytes; + + unsigned int mblk_width_pixels; + unsigned int mblk_size_bytes; + unsigned int mblk_height_4bpe_pixels; + unsigned int mblk_height_8bpe_pixels; +}; + +// Output of DML2 for clock requirements +struct dml2_dcn_clocks { + unsigned int dispclk_khz; + unsigned int dcfclk_khz; + unsigned int fclk_khz; + unsigned int uclk_mts; + unsigned int phyclk_khz; + unsigned int socclk_khz; + unsigned int ref_dtbclk_khz; + bool p_state_supported; + unsigned int cab_num_ways_required; + unsigned int dcfclk_khz_ds; +}; + +struct dml2_dc_callbacks { + struct dc *dc; + bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); + void (*build_test_pattern_params)(struct resource_context *res_ctx, struct pipe_ctx *otg_master); + bool (*can_support_mclk_switch_using_fw_based_vblank_stretch)(struct dc *dc, struct dc_state *context); + bool (*acquire_secondary_pipe_for_mpc_odm)(const struct dc *dc, struct dc_state *state, struct pipe_ctx *pri_pipe, struct pipe_ctx *sec_pipe, bool odm); + bool (*update_pipes_for_stream_with_slice_count)( + struct dc_state *new_ctx, + const struct dc_state *cur_ctx, + const struct resource_pool *pool, + const struct dc_stream_state *stream, + int new_slice_count); + bool (*update_pipes_for_plane_with_slice_count)( + struct dc_state *new_ctx, + const struct dc_state *cur_ctx, + const struct resource_pool *pool, + const struct dc_plane_state *plane, + int slice_count); + int (*get_odm_slice_index)(const struct pipe_ctx *opp_head); + int (*get_odm_slice_count)(const struct pipe_ctx *opp_head); + int (*get_mpc_slice_index)(const struct pipe_ctx *dpp_pipe); + int (*get_mpc_slice_count)(const struct pipe_ctx *dpp_pipe); + struct pipe_ctx *(*get_opp_head)(const struct pipe_ctx *pipe_ctx); + struct pipe_ctx *(*get_otg_master_for_stream)( + struct resource_context *res_ctx, + const struct dc_stream_state *stream); + int (*get_opp_heads_for_otg_master)(const struct pipe_ctx *otg_master, + struct resource_context *res_ctx, + struct pipe_ctx *opp_heads[MAX_PIPES]); + int (*get_dpp_pipes_for_plane)(const struct dc_plane_state *plane, + struct resource_context *res_ctx, + struct pipe_ctx *dpp_pipes[MAX_PIPES]); + struct dc_stream_status *(*get_stream_status)( + struct dc_state *state, + const struct dc_stream_state *stream); + struct dc_stream_state *(*get_stream_from_id)(const struct dc_state *state, unsigned int id); + unsigned int (*get_max_flickerless_instant_vtotal_increase)( + struct dc_stream_state *stream, + bool is_gaming); + bool (*allocate_mcache)(struct dc_state *context, const struct dc_mcache_params *mcache_params); +}; + +struct dml2_dc_svp_callbacks { + struct dc *dc; + bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); + struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); + struct dc_plane_state* (*create_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + enum dc_status (*add_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); + bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + bool (*remove_phantom_plane)(const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *context); + enum dc_status (*remove_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *plane); + void (*release_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); + enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx); + enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream); + struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream); + bool (*remove_phantom_streams_and_planes)( + const struct dc *dc, + struct dc_state *state); + void (*release_phantom_streams_and_planes)( + const struct dc *dc, + struct dc_state *state); + unsigned int (*calculate_mall_ways_from_bytes)( + const struct dc *dc, + unsigned int total_size_in_mall_bytes); +}; + +struct dml2_clks_table_entry { + unsigned int dcfclk_mhz; + unsigned int fclk_mhz; + unsigned int memclk_mhz; + unsigned int socclk_mhz; + unsigned int dtbclk_mhz; + unsigned int dispclk_mhz; + unsigned int dppclk_mhz; + unsigned int dram_speed_mts; /*which is based on wck_ratio*/ +}; + +struct dml2_clks_num_entries { + unsigned int num_dcfclk_levels; + unsigned int num_fclk_levels; + unsigned int num_memclk_levels; + unsigned int num_socclk_levels; + unsigned int num_dtbclk_levels; + unsigned int num_dispclk_levels; + unsigned int num_dppclk_levels; +}; + +struct dml2_clks_limit_table { + struct dml2_clks_table_entry clk_entries[DML2_MAX_NUM_DPM_LVL]; + struct dml2_clks_num_entries num_entries_per_clk; + unsigned int num_states; +}; + +// Various overrides, per ASIC or per SKU specific, or for debugging purpose when/if available +struct dml2_soc_bbox_overrides { + double xtalclk_mhz; + double dchub_refclk_mhz; + double dprefclk_mhz; + double disp_pll_vco_speed_mhz; + double urgent_latency_us; + double sr_exit_latency_us; + double sr_enter_plus_exit_latency_us; + double sr_exit_z8_time_us; + double sr_enter_plus_exit_z8_time_us; + double dram_clock_change_latency_us; + double fclk_change_latency_us; + unsigned int dram_num_chan; + unsigned int dram_chanel_width_bytes; + struct dml2_clks_limit_table clks_table; +}; + +enum dml2_force_pstate_methods { + dml2_force_pstate_method_auto = 0, + dml2_force_pstate_method_vactive, + dml2_force_pstate_method_vblank, + dml2_force_pstate_method_drr, + dml2_force_pstate_method_subvp, +}; + +struct dml2_configuration_options { + int dcn_pipe_count; + bool use_native_pstate_optimization; + bool enable_windowed_mpo_odm; + bool use_native_soc_bb_construction; + bool skip_hw_state_mapping; + bool optimize_odm_4to1; + bool minimize_dispclk_using_odm; + bool override_det_buffer_size_kbytes; + struct dml2_dc_callbacks callbacks; + struct { + bool force_disable_subvp; + bool force_enable_subvp; + unsigned int subvp_fw_processing_delay_us; + unsigned int subvp_pstate_allow_width_us; + unsigned int subvp_prefetch_end_to_mall_start_us; + unsigned int subvp_swath_height_margin_lines; + struct dml2_dc_svp_callbacks callbacks; + } svp_pstate; + struct dml2_soc_mall_info mall_cfg; + struct dml2_soc_bbox_overrides bbox_overrides; + unsigned int max_segments_per_hubp; + unsigned int det_segment_size; + /* Only for debugging purposes when initializing SOCBB params via tool for DML21. */ + struct socbb_ip_params_external *external_socbb_ip_params; + struct { + bool force_pstate_method_enable; + enum dml2_force_pstate_methods force_pstate_method_values[MAX_PIPES]; + } pmo; + bool map_dc_pipes_with_callbacks; + + bool use_clock_dc_limits; + bool gpuvm_enable; + bool force_tdlut_enable; + void *bb_from_dmub; +}; + +/* + * dml2_create - Creates dml2_context. + * @in_dc: dc. + * @config: dml2 configuration options. + * @dml2: Created dml2 context. + * + * Create and destroy of DML2 is done as part of dc_state creation + * and dc_state_free. DML2 IP, SOC and STATES are initialized at + * creation time. + * + * Return: True if dml2 is successfully created, false otherwise. + */ +bool dml2_create(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2); + +void dml2_destroy(struct dml2_context *dml2); +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2); +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2); +void dml2_reinit(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2); + +/* + * dml2_validate - Determines if a display configuration is supported or not. + * @in_dc: dc. + * @context: dc_state to be validated. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX will not populate context.res_ctx. + * + * DML1.0 compatible interface for validation. + * + * Based on fast_validate option internally would call: + * + * -dml2_validate_and_build_resource - for non fast_validate option + * Calculates if dc_state can be supported on the SOC, and attempts to + * optimize the power management feature supports versus minimum clocks. + * If supported, also builds out_new_hw_state to represent the hw programming + * for the new dc state. + * + * -dml2_validate_only - for fast_validate option + * Calculates if dc_state can be supported on the SOC (i.e. at maximum + * clocks) with all mandatory power features enabled. + + * Context: Two threads may not invoke this function concurrently unless they reference + * separate dc_states for validation. + * Return: True if mode is supported, false otherwise. + */ +bool dml2_validate(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml2, + enum dc_validate_mode validate_mode); + +/* + * dml2_extract_dram_and_fclk_change_support - Extracts the FCLK and UCLK change support info. + * @dml2: input dml2 context pointer. + * @fclk_change_support: output pointer holding the fclk change support info (vactive, vblank, unsupported). + * @dram_clk_change_support: output pointer holding the uclk change support info (vactive, vblank, unsupported). + */ +void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, + unsigned int *fclk_change_support, unsigned int *dram_clk_change_support); +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2); +#endif //_DML2_WRAPPER_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h new file mode 100644 index 000000000000..17f0972b1af7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_ASSERT_H__ +#define __DML_ASSERT_H__ + +#include "os_types.h" + +#endif //__DML_ASSERT_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h new file mode 100644 index 000000000000..d459f93cf40b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +/* This header intentinally does not include an #ifdef guard as it only contains includes for other headers*/ + +/* + * Standard Types + */ +#include "os_types.h" +#include "cmntypes.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c new file mode 100644 index 000000000000..00d22e542469 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c @@ -0,0 +1,573 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "dml_display_rq_dlg_calc.h" +#include "display_mode_core.h" +#include "display_mode_util.h" + +static dml_bool_t is_dual_plane(enum dml_source_format_class source_format) +{ + dml_bool_t ret_val = 0; + + if ((source_format == dml_420_12) || (source_format == dml_420_8) || (source_format == dml_420_10) || (source_format == dml_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; + enum dml_swizzle_mode sw_mode = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[plane_idx]; + dml_bool_t dual_plane = is_dual_plane((enum dml_source_format_class)(source_format)); + + uint32 pixel_chunk_bytes = 0; + uint32 min_pixel_chunk_bytes = 0; + uint32 meta_chunk_bytes = 0; + uint32 min_meta_chunk_bytes = 0; + uint32 dpte_group_bytes = 0; + uint32 mpte_group_bytes = 0; + + uint32 p1_pixel_chunk_bytes = 0; + uint32 p1_min_pixel_chunk_bytes = 0; + uint32 p1_meta_chunk_bytes = 0; + uint32 p1_min_meta_chunk_bytes = 0; + uint32 p1_dpte_group_bytes = 0; + uint32 p1_mpte_group_bytes = 0; + + dml_uint_t detile_buf_size_in_bytes; + dml_uint_t detile_buf_plane1_addr = 0; + + dml_float_t stored_swath_l_bytes; + dml_float_t stored_swath_c_bytes; + dml_bool_t is_phantom_pipe; + + dml_uint_t pte_row_height_linear; + + dml_print("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + memset(rq_regs, 0, sizeof(*rq_regs)); + + pixel_chunk_bytes = (dml_uint_t)(dml_get_pixel_chunk_size_in_kbyte(mode_lib) * 1024); + min_pixel_chunk_bytes = (dml_uint_t)(dml_get_min_pixel_chunk_size_in_byte(mode_lib)); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + meta_chunk_bytes = (dml_uint_t)(dml_get_meta_chunk_size_in_kbyte(mode_lib) * 1024); + min_meta_chunk_bytes = (dml_uint_t)(dml_get_min_meta_chunk_size_in_byte(mode_lib)); + + dpte_group_bytes = (dml_uint_t)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); + mpte_group_bytes = (dml_uint_t)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_meta_chunk_bytes = meta_chunk_bytes; + p1_min_meta_chunk_bytes = min_meta_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml_rgbe_alpha) + p1_pixel_chunk_bytes = (dml_uint_t)(dml_get_alpha_pixel_chunk_size_in_kbyte(mode_lib) * 1024); + + rq_regs->rq_regs_l.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) pixel_chunk_bytes) - 10); + rq_regs->rq_regs_c.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_pixel_chunk_bytes) - 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_pixel_chunk_bytes) - 8 + 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_pixel_chunk_bytes) - 8 + 1); + + rq_regs->rq_regs_l.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) meta_chunk_bytes) - 10); + rq_regs->rq_regs_c.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_meta_chunk_bytes) - 10); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_l.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_l.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_meta_chunk_bytes) - 6 + 1); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_c.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_c.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_meta_chunk_bytes) - 6 + 1); + + rq_regs->rq_regs_l.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) dpte_group_bytes) - 6); + rq_regs->rq_regs_l.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) mpte_group_bytes) - 6); + rq_regs->rq_regs_c.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_dpte_group_bytes) - 6); + rq_regs->rq_regs_c.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_mpte_group_bytes) - 6); + + detile_buf_size_in_bytes = (dml_uint_t)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); + + pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); + + if (sw_mode == dml_sw_linear) + ASSERT(pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) pte_row_height_linear), 1) - 3); + + if (dual_plane) { + dml_uint_t p1_pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); + if (sw_mode == dml_sw_linear) + ASSERT(p1_pte_row_height_linear >= 8); + + rq_regs->rq_regs_c.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) p1_pte_row_height_linear), 1) - 3); + } + + rq_regs->rq_regs_l.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_l(mode_lib, pipe_idx))); + rq_regs->rq_regs_c.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_c(mode_lib, pipe_idx))); + + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); + stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); + is_phantom_pipe = dml_get_is_phantom_pipe(mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (dml_uint_t)((1024.0*1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (dml_uint_t)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (dml_uint_t)(dml_round_to_multiple((dml_uint_t)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + dml_print_rq_regs_st(rq_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. + + +void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, + dml_display_ttu_regs_st *disp_ttu_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; + struct dml_timing_cfg_st *timing = &mode_lib->ms.cache_display_cfg.timing; + struct dml_plane_cfg_st *plane = &mode_lib->ms.cache_display_cfg.plane; + struct dml_hw_resource_st *hw = &mode_lib->ms.cache_display_cfg.hw; + dml_bool_t dual_plane = is_dual_plane(source_format); + dml_uint_t num_cursors = plane->NumberOfCursors[plane_idx]; + enum dml_odm_mode odm_mode = hw->ODMMode[plane_idx]; + + dml_uint_t htotal = timing->HTotal[plane_idx]; + dml_uint_t hactive = timing->HActive[plane_idx]; + dml_uint_t hblank_end = timing->HBlankEnd[plane_idx]; + dml_uint_t vblank_end = timing->VBlankEnd[plane_idx]; + dml_bool_t interlaced = timing->Interlace[plane_idx]; + dml_float_t pclk_freq_in_mhz = (dml_float_t) timing->PixelClock[plane_idx]; + dml_float_t refclk_freq_in_mhz = (hw->DLGRefClkFreqMHz > 0) ? (dml_float_t) hw->DLGRefClkFreqMHz : mode_lib->soc.refclk_mhz; + dml_float_t ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + dml_uint_t vready_after_vcount0; + + dml_uint_t dst_x_after_scaler; + dml_uint_t dst_y_after_scaler; + + dml_float_t dst_y_prefetch; + dml_float_t dst_y_per_vm_vblank; + dml_float_t dst_y_per_row_vblank; + dml_float_t dst_y_per_vm_flip; + dml_float_t dst_y_per_row_flip; + + dml_float_t max_dst_y_per_vm_vblank = 32.0; //U5.2 + dml_float_t max_dst_y_per_row_vblank = 16.0; //U4.2 + + dml_float_t vratio_pre_l; + dml_float_t vratio_pre_c; + + dml_float_t refcyc_per_line_delivery_pre_l; + dml_float_t refcyc_per_line_delivery_l; + dml_float_t refcyc_per_line_delivery_pre_c = 0.; + dml_float_t refcyc_per_line_delivery_c = 0.; + dml_float_t refcyc_per_req_delivery_pre_l; + dml_float_t refcyc_per_req_delivery_l; + dml_float_t refcyc_per_req_delivery_pre_c = 0.; + dml_float_t refcyc_per_req_delivery_c = 0.; + dml_float_t refcyc_per_req_delivery_pre_cur0 = 0.; + dml_float_t refcyc_per_req_delivery_cur0 = 0.; + + dml_float_t dst_y_per_pte_row_nom_l; + dml_float_t dst_y_per_pte_row_nom_c; + dml_float_t dst_y_per_meta_row_nom_l; + dml_float_t dst_y_per_meta_row_nom_c; + dml_float_t refcyc_per_pte_group_nom_l; + dml_float_t refcyc_per_pte_group_nom_c; + dml_float_t refcyc_per_pte_group_vblank_l; + dml_float_t refcyc_per_pte_group_vblank_c; + dml_float_t refcyc_per_pte_group_flip_l; + dml_float_t refcyc_per_pte_group_flip_c; + dml_float_t refcyc_per_meta_chunk_nom_l; + dml_float_t refcyc_per_meta_chunk_nom_c; + dml_float_t refcyc_per_meta_chunk_vblank_l; + dml_float_t refcyc_per_meta_chunk_vblank_c; + dml_float_t refcyc_per_meta_chunk_flip_l; + dml_float_t refcyc_per_meta_chunk_flip_c; + + dml_float_t temp; + dml_float_t min_ttu_vblank; + dml_uint_t min_dst_y_next_start; + + dml_print("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + dml_print("DML_DLG::%s: plane_idx = %d\n", __func__, plane_idx); + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: hw->DLGRefClkFreqMHz = %3.2f\n", __func__, hw->DLGRefClkFreqMHz); + dml_print("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.refclk_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + ASSERT(refclk_freq_in_mhz != 0); + ASSERT(pclk_freq_in_mhz != 0); + ASSERT(ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (odm_mode == dml_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)((dml_float_t) hblank_end * ref_freq_to_pix_freq); + } else if (odm_mode == dml_odm_mode_combine_2to1 || odm_mode == dml_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + dml_uint_t num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); + dml_uint_t first_pipe_idx_in_plane = __DML_NUM_PLANES__; + dml_uint_t pipe_idx_in_combine = 0; // pipe index within the plane + dml_uint_t odm_combine_factor = (odm_mode == dml_odm_mode_combine_2to1 ? 2 : 4); + + for (dml_uint_t i = 0; i < num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == plane_idx) { + if (i < first_pipe_idx_in_plane) { + first_pipe_idx_in_plane = i; + } + } + } + pipe_idx_in_combine = pipe_idx - first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)(((dml_float_t) hblank_end + (dml_float_t) pipe_idx_in_combine * (dml_float_t) hactive / (dml_float_t) odm_combine_factor) * ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + dml_print("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, first_pipe_idx_in_plane); + dml_print("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, pipe_idx_in_combine); + dml_print("DML_DLG: %s: odm_combine_factor = %d\n", __func__, odm_combine_factor); + } + dml_print("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (dml_uint_t)dml_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (dml_uint_t)(ref_freq_to_pix_freq * dml_pow(2, 19)); + temp = dml_pow(2, 8); + disp_dlg_regs->refcyc_per_htotal = (dml_uint_t)(ref_freq_to_pix_freq * (dml_float_t)htotal * temp); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + + min_ttu_vblank = dml_get_min_ttu_vblank_in_us(mode_lib, pipe_idx); + min_dst_y_next_start = (dml_uint_t)(dml_get_min_dst_y_next_start(mode_lib, pipe_idx)); + + dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + + vready_after_vcount0 = (dml_uint_t)(dml_get_vready_at_or_after_vsync(mode_lib, pipe_idx)); + disp_dlg_regs->vready_after_vcount0 = vready_after_vcount0; + + dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + dst_x_after_scaler = (dml_uint_t)(dml_get_dst_x_after_scaler(mode_lib, pipe_idx)); + dst_y_after_scaler = (dml_uint_t)(dml_get_dst_y_after_scaler(mode_lib, pipe_idx)); + + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); + + dst_y_prefetch = dml_get_dst_y_prefetch(mode_lib, pipe_idx); + dst_y_per_vm_vblank = dml_get_dst_y_per_vm_vblank(mode_lib, pipe_idx); + dst_y_per_row_vblank = dml_get_dst_y_per_row_vblank(mode_lib, pipe_idx); + dst_y_per_vm_flip = dml_get_dst_y_per_vm_flip(mode_lib, pipe_idx); + dst_y_per_row_flip = dml_get_dst_y_per_row_flip(mode_lib, pipe_idx); + + // magic! + if (htotal <= 75) { + max_dst_y_per_vm_vblank = 100.0; + max_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + + vratio_pre_l = dml_get_vratio_prefetch_l(mode_lib, pipe_idx); + vratio_pre_c = dml_get_vratio_prefetch_c(mode_lib, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); + + // Active + refcyc_per_line_delivery_pre_l = dml_get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_line_delivery_l = dml_get_refcyc_per_line_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = dml_get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_line_delivery_c = dml_get_refcyc_per_line_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (dml_uint_t)(dml_get_refcyc_per_vm_dmdata_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (dml_uint_t)(dml_get_dmdata_dl_delta_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + + refcyc_per_req_delivery_pre_l = dml_get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_l = dml_get_refcyc_per_req_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = dml_get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_c = dml_get_refcyc_per_req_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); + } + + // TTU - Cursor + ASSERT(num_cursors <= 1); + if (num_cursors > 0) { + refcyc_per_req_delivery_pre_cur0 = dml_get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_cur0 = dml_get_refcyc_per_cursor_req_delivery_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0); + } + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (dml_uint_t)((dml_float_t) min_dst_y_next_start * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (dml_uint_t)dml_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (dml_uint_t)((dml_float_t) dst_x_after_scaler * ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (dml_uint_t)(dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (dml_uint_t)(dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (dml_uint_t)(dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (dml_uint_t)(dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (dml_uint_t)(dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (dml_uint_t)(vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (dml_uint_t)(vratio_pre_c * dml_pow(2, 19)); + + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_get_refcyc_per_vm_req_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); + + dst_y_per_pte_row_nom_l = dml_get_dst_y_per_pte_row_nom_l(mode_lib, pipe_idx); + dst_y_per_pte_row_nom_c = dml_get_dst_y_per_pte_row_nom_c(mode_lib, pipe_idx); + dst_y_per_meta_row_nom_l = dml_get_dst_y_per_meta_row_nom_l(mode_lib, pipe_idx); + dst_y_per_meta_row_nom_c = dml_get_dst_y_per_meta_row_nom_c(mode_lib, pipe_idx); + + refcyc_per_pte_group_nom_l = dml_get_refcyc_per_pte_group_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_nom_c = dml_get_refcyc_per_pte_group_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_vblank_l = dml_get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_vblank_c = dml_get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_flip_l = dml_get_refcyc_per_pte_group_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_flip_c = dml_get_refcyc_per_pte_group_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + refcyc_per_meta_chunk_nom_l = dml_get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_nom_c = dml_get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_vblank_l = dml_get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_vblank_c = dml_get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_flip_l = dml_get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_flip_c = dml_get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (dml_uint_t)(dst_y_per_pte_row_nom_l * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (dml_uint_t)(dst_y_per_pte_row_nom_c * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_l = (dml_uint_t)(dst_y_per_meta_row_nom_l * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (dml_uint_t)(dst_y_per_meta_row_nom_c * dml_pow(2, 2)); + disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (dml_uint_t)(refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (dml_uint_t)(refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (dml_uint_t)(refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (dml_uint_t)(refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (dml_uint_t)(refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (dml_uint_t)(refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (dml_uint_t)(refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (dml_uint_t)(refcyc_per_meta_chunk_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_c, 1); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (dml_uint_t)(refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (dml_uint_t)(refcyc_per_req_delivery_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (dml_uint_t)(refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (dml_uint_t)(refcyc_per_req_delivery_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (dml_uint_t)(refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (dml_uint_t)(refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0; + disp_ttu_regs->refcyc_per_req_delivery_cur1 = 0; + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (dml_uint_t)(4.0 * (dml_float_t)htotal * ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + disp_ttu_regs->min_ttu_vblank = (dml_uint_t)(min_ttu_vblank * refclk_freq_in_mhz); + + // CHECK for HW registers' range, assert or clamp + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_pow(2, 23) - 1); + + + ASSERT(disp_dlg_regs->dst_y_after_scaler < (dml_uint_t)8); + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (dml_uint_t)dml_pow(2, 17)); + if (dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (dml_uint_t)dml_pow(2, 17)) { // FIXME what so special about chroma, can we just assert? + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (dml_uint_t)dml_pow(2, 17) - 1); + } + } + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (dml_uint_t)dml_pow(2, 17)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (dml_uint_t)dml_pow(2, 17)); + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); + if (dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); + } + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (dml_uint_t)dml_pow(2, 13)); + if (dual_plane) { + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (dml_uint_t)dml_pow(2, 13)); + } + + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); + if (dual_plane) { + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); + } + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_ttu_regs->qos_level_low_wm < (dml_uint_t) dml_pow(2, 14)); + ASSERT(disp_ttu_regs->qos_level_high_wm < (dml_uint_t) dml_pow(2, 14)); + ASSERT(disp_ttu_regs->min_ttu_vblank < (dml_uint_t) dml_pow(2, 24)); + + dml_print_ttu_regs_st(disp_ttu_regs); + dml_print_dlg_regs_st(disp_dlg_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param) +{ + memset(arb_param, 0, sizeof(*arb_param)); + arb_param->max_req_outstanding = 256; + arb_param->min_req_outstanding = 256; // turn off the sat level feature if this set to max + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = 2 * 8; // assume max data chunk size of 8K +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h new file mode 100644 index 000000000000..bf491cf0582d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DML_DISPLAY_RQ_DLG_CALC_H__ +#define __DML_DISPLAY_RQ_DLG_CALC_H__ + +#include "display_mode_core_structs.h" +#include "display_mode_lib_defines.h" + +struct display_mode_lib_st; + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate +// and then populate the rq_regs struct +// Input: +// Assume mode_program is already called +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> + +void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx); + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// Assume mode_program is already called +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *dlg_regs, + dml_display_ttu_regs_st *ttu_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx); + +// Function: dml_rq_dlg_get_arb_params +void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h new file mode 100644 index 000000000000..7fadbe6d7af4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_LOGGING_H__ +#define __DML_LOGGING_H__ + +#define dml_print(...) ((void)0) + +#endif //__DML_LOGGING_H__ |
