From b29c22b8dafd951664a491bf629e615d81513197 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 18 Sep 2025 17:52:04 +0530 Subject: drm/amdgpu: Fix vbios build number parsing logic It's not necessary that the build string and atom header section has a difference of 32 bytes. Use the remaining bytes in the section as copy limit. Fixes: d6fa80266178 ("drm/amdgpu: Add vbios build number interface") Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/atom.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index be5d67c2c7a1..7a063e44d429 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1502,7 +1502,7 @@ static void atom_get_vbios_build(struct atom_context *ctx) { unsigned char *atom_rom_hdr; unsigned char *str; - uint16_t base; + uint16_t base, len; base = CU16(ATOM_ROM_TABLE_PTR); atom_rom_hdr = CSTR(base); @@ -1515,8 +1515,9 @@ static void atom_get_vbios_build(struct atom_context *ctx) while (str < atom_rom_hdr && *str++) ; - if ((str + STRLEN_NORMAL) < atom_rom_hdr) - strscpy(ctx->build_num, str, STRLEN_NORMAL); + len = min(atom_rom_hdr - str, STRLEN_NORMAL); + if (len) + strscpy(ctx->build_num, str, len); } struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) -- cgit v1.2.3 From ae4d627e43ccecc403e7378811289b33de38e67d Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 19 Sep 2025 09:44:25 +0800 Subject: drm/amd/pm: place the smu 13.0.0 pptable header into the correct folder Place the smu 13.0.0 pptable header in the correct folder Signed-off-by: Yang Wang Reviewed-by: Mangesh Gadre Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h | 198 --------------------- .../gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h | 198 +++++++++++++++++++++ 2 files changed, 198 insertions(+), 198 deletions(-) delete mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h create mode 100644 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h deleted file mode 100644 index 251ed011b3b0..000000000000 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef SMU_13_0_0_PPTABLE_H -#define SMU_13_0_0_PPTABLE_H - -#pragma pack(push, 1) - -#define SMU_13_0_0_TABLE_FORMAT_REVISION 15 - -//// POWERPLAYTABLE::ulPlatformCaps -#define SMU_13_0_0_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. -#define SMU_13_0_0_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 0x2 // This cap indicates whether power source notificaiton is done by SBIOS instead of OS. -#define SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC 0x4 // This cap indicates whether DC mode notificaiton is done by GPIO pin directly. -#define SMU_13_0_0_PP_PLATFORM_CAP_BACO 0x8 // This cap indicates whether board supports the BACO circuitry. -#define SMU_13_0_0_PP_PLATFORM_CAP_MACO 0x10 // This cap indicates whether board supports the MACO circuitry. -#define SMU_13_0_0_PP_PLATFORM_CAP_SHADOWPSTATE 0x20 // This cap indicates whether board supports the Shadow Pstate. - -// SMU_13_0_0_PP_THERMALCONTROLLER - Thermal Controller Type -#define SMU_13_0_0_PP_THERMALCONTROLLER_NONE 0 -#define SMU_13_0_0_PP_THERMALCONTROLLER_NAVI21 28 - -#define SMU_13_0_0_PP_OVERDRIVE_VERSION 0x83 // OverDrive 8 Table Version 0.2 -#define SMU_13_0_0_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 - -enum SMU_13_0_0_ODFEATURE_CAP { - SMU_13_0_0_ODCAP_GFXCLK_LIMITS = 0, - SMU_13_0_0_ODCAP_UCLK_LIMITS, - SMU_13_0_0_ODCAP_POWER_LIMIT, - SMU_13_0_0_ODCAP_FAN_ACOUSTIC_LIMIT, - SMU_13_0_0_ODCAP_FAN_SPEED_MIN, - SMU_13_0_0_ODCAP_TEMPERATURE_FAN, - SMU_13_0_0_ODCAP_TEMPERATURE_SYSTEM, - SMU_13_0_0_ODCAP_MEMORY_TIMING_TUNE, - SMU_13_0_0_ODCAP_FAN_ZERO_RPM_CONTROL, - SMU_13_0_0_ODCAP_AUTO_UV_ENGINE, - SMU_13_0_0_ODCAP_AUTO_OC_ENGINE, - SMU_13_0_0_ODCAP_AUTO_OC_MEMORY, - SMU_13_0_0_ODCAP_FAN_CURVE, - SMU_13_0_0_ODCAP_AUTO_FAN_ACOUSTIC_LIMIT, - SMU_13_0_0_ODCAP_POWER_MODE, - SMU_13_0_0_ODCAP_PER_ZONE_GFX_VOLTAGE_OFFSET, - SMU_13_0_0_ODCAP_COUNT, -}; - -enum SMU_13_0_0_ODFEATURE_ID { - SMU_13_0_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_13_0_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature - SMU_13_0_0_ODFEATURE_UCLK_LIMITS = 1 << SMU_13_0_0_ODCAP_UCLK_LIMITS, //UCLK Limit feature - SMU_13_0_0_ODFEATURE_POWER_LIMIT = 1 << SMU_13_0_0_ODCAP_POWER_LIMIT, //Power Limit feature - SMU_13_0_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_13_0_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature - SMU_13_0_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_13_0_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature - SMU_13_0_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_13_0_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature - SMU_13_0_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_13_0_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature - SMU_13_0_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_13_0_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature - SMU_13_0_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_13_0_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature - SMU_13_0_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_13_0_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature - SMU_13_0_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_13_0_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature - SMU_13_0_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_13_0_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature - SMU_13_0_0_ODFEATURE_FAN_CURVE = 1 << SMU_13_0_0_ODCAP_FAN_CURVE, //Fan Curve feature - SMU_13_0_0_ODFEATURE_AUTO_FAN_ACOUSTIC_LIMIT = 1 << SMU_13_0_0_ODCAP_AUTO_FAN_ACOUSTIC_LIMIT, //Auto Fan Acoustic RPM feature - SMU_13_0_0_ODFEATURE_POWER_MODE = 1 << SMU_13_0_0_ODCAP_POWER_MODE, //Optimized GPU Power Mode feature - SMU_13_0_0_ODFEATURE_PER_ZONE_GFX_VOLTAGE_OFFSET = 1 << SMU_13_0_0_ODCAP_PER_ZONE_GFX_VOLTAGE_OFFSET, //Perzone voltage offset feature - SMU_13_0_0_ODFEATURE_COUNT = 16, -}; - -#define SMU_13_0_0_MAX_ODFEATURE 32 //Maximum Number of OD Features - -enum SMU_13_0_0_ODSETTING_ID { - SMU_13_0_0_ODSETTING_GFXCLKFMAX = 0, - SMU_13_0_0_ODSETTING_GFXCLKFMIN, - SMU_13_0_0_ODSETTING_UCLKFMIN, - SMU_13_0_0_ODSETTING_UCLKFMAX, - SMU_13_0_0_ODSETTING_POWERPERCENTAGE, - SMU_13_0_0_ODSETTING_FANRPMMIN, - SMU_13_0_0_ODSETTING_FANRPMACOUSTICLIMIT, - SMU_13_0_0_ODSETTING_FANTARGETTEMPERATURE, - SMU_13_0_0_ODSETTING_OPERATINGTEMPMAX, - SMU_13_0_0_ODSETTING_ACTIMING, - SMU_13_0_0_ODSETTING_FAN_ZERO_RPM_CONTROL, - SMU_13_0_0_ODSETTING_AUTOUVENGINE, - SMU_13_0_0_ODSETTING_AUTOOCENGINE, - SMU_13_0_0_ODSETTING_AUTOOCMEMORY, - SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_1, - SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_1, - SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_2, - SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_2, - SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_3, - SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_3, - SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_4, - SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_4, - SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_5, - SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_5, - SMU_13_0_0_ODSETTING_AUTO_FAN_ACOUSTIC_LIMIT, - SMU_13_0_0_ODSETTING_POWER_MODE, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_1, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_2, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_3, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_4, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_5, - SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_6, - SMU_13_0_0_ODSETTING_COUNT, -}; -#define SMU_13_0_0_MAX_ODSETTING 64 //Maximum Number of ODSettings - -enum SMU_13_0_0_PWRMODE_SETTING { - SMU_13_0_0_PMSETTING_POWER_LIMIT_QUIET = 0, - SMU_13_0_0_PMSETTING_POWER_LIMIT_BALANCE, - SMU_13_0_0_PMSETTING_POWER_LIMIT_TURBO, - SMU_13_0_0_PMSETTING_POWER_LIMIT_RAGE, - SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_QUIET, - SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_BALANCE, - SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_TURBO, - SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_RAGE, - SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_QUIET, - SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_BALANCE, - SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_TURBO, - SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_RAGE, - SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_QUIET, - SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, - SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, - SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, -}; -#define SMU_13_0_0_MAX_PMSETTING 32 //Maximum Number of PowerMode Settings - -struct smu_13_0_0_overdrive_table { - uint8_t revision; //Revision = SMU_13_0_0_PP_OVERDRIVE_VERSION - uint8_t reserve[3]; //Zero filled field reserved for future use - uint32_t feature_count; //Total number of supported features - uint32_t setting_count; //Total number of supported settings - uint8_t cap[SMU_13_0_0_MAX_ODFEATURE]; //OD feature support flags - uint32_t max[SMU_13_0_0_MAX_ODSETTING]; //default maximum settings - uint32_t min[SMU_13_0_0_MAX_ODSETTING]; //default minimum settings - int16_t pm_setting[SMU_13_0_0_MAX_PMSETTING]; //Optimized power mode feature settings -}; - -enum SMU_13_0_0_PPCLOCK_ID { - SMU_13_0_0_PPCLOCK_GFXCLK = 0, - SMU_13_0_0_PPCLOCK_SOCCLK, - SMU_13_0_0_PPCLOCK_UCLK, - SMU_13_0_0_PPCLOCK_FCLK, - SMU_13_0_0_PPCLOCK_DCLK_0, - SMU_13_0_0_PPCLOCK_VCLK_0, - SMU_13_0_0_PPCLOCK_DCLK_1, - SMU_13_0_0_PPCLOCK_VCLK_1, - SMU_13_0_0_PPCLOCK_DCEFCLK, - SMU_13_0_0_PPCLOCK_DISPCLK, - SMU_13_0_0_PPCLOCK_PIXCLK, - SMU_13_0_0_PPCLOCK_PHYCLK, - SMU_13_0_0_PPCLOCK_DTBCLK, - SMU_13_0_0_PPCLOCK_COUNT, -}; -#define SMU_13_0_0_MAX_PPCLOCK 16 //Maximum Number of PP Clocks - -struct smu_13_0_0_powerplay_table { - struct atom_common_table_header header; //For SMU13, header.format_revision = 15, header.content_revision = 0 - uint8_t table_revision; //For SMU13, table_revision = 2 - uint8_t padding; - uint16_t table_size; //Driver portion table size. The offset to smc_pptable including header size - uint32_t golden_pp_id; //PPGen use only: PP Table ID on the Golden Data Base - uint32_t golden_revision; //PPGen use only: PP Table Revision on the Golden Data Base - uint16_t format_id; //PPGen use only: PPTable for different ASICs. For SMU13 this should be 0x80 - uint32_t platform_caps; //POWERPLAYABLE::ulPlatformCaps - - uint8_t thermal_controller_type; //one of SMU_13_0_0_PP_THERMALCONTROLLER - - uint16_t small_power_limit1; - uint16_t small_power_limit2; - uint16_t boost_power_limit; //For Gemini Board, when the slave adapter is in BACO mode, the master adapter will use this boost power limit instead of the default power limit to boost the power limit. - uint16_t software_shutdown_temp; - - uint32_t reserve[45]; - - struct smu_13_0_0_overdrive_table overdrive_table; - uint8_t padding1; - PPTable_t smc_pptable; //PPTable_t in driver_if.h -}; - -#pragma pack(pop) - -#endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h new file mode 100644 index 000000000000..251ed011b3b0 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h @@ -0,0 +1,198 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef SMU_13_0_0_PPTABLE_H +#define SMU_13_0_0_PPTABLE_H + +#pragma pack(push, 1) + +#define SMU_13_0_0_TABLE_FORMAT_REVISION 15 + +//// POWERPLAYTABLE::ulPlatformCaps +#define SMU_13_0_0_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. +#define SMU_13_0_0_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 0x2 // This cap indicates whether power source notificaiton is done by SBIOS instead of OS. +#define SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC 0x4 // This cap indicates whether DC mode notificaiton is done by GPIO pin directly. +#define SMU_13_0_0_PP_PLATFORM_CAP_BACO 0x8 // This cap indicates whether board supports the BACO circuitry. +#define SMU_13_0_0_PP_PLATFORM_CAP_MACO 0x10 // This cap indicates whether board supports the MACO circuitry. +#define SMU_13_0_0_PP_PLATFORM_CAP_SHADOWPSTATE 0x20 // This cap indicates whether board supports the Shadow Pstate. + +// SMU_13_0_0_PP_THERMALCONTROLLER - Thermal Controller Type +#define SMU_13_0_0_PP_THERMALCONTROLLER_NONE 0 +#define SMU_13_0_0_PP_THERMALCONTROLLER_NAVI21 28 + +#define SMU_13_0_0_PP_OVERDRIVE_VERSION 0x83 // OverDrive 8 Table Version 0.2 +#define SMU_13_0_0_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 + +enum SMU_13_0_0_ODFEATURE_CAP { + SMU_13_0_0_ODCAP_GFXCLK_LIMITS = 0, + SMU_13_0_0_ODCAP_UCLK_LIMITS, + SMU_13_0_0_ODCAP_POWER_LIMIT, + SMU_13_0_0_ODCAP_FAN_ACOUSTIC_LIMIT, + SMU_13_0_0_ODCAP_FAN_SPEED_MIN, + SMU_13_0_0_ODCAP_TEMPERATURE_FAN, + SMU_13_0_0_ODCAP_TEMPERATURE_SYSTEM, + SMU_13_0_0_ODCAP_MEMORY_TIMING_TUNE, + SMU_13_0_0_ODCAP_FAN_ZERO_RPM_CONTROL, + SMU_13_0_0_ODCAP_AUTO_UV_ENGINE, + SMU_13_0_0_ODCAP_AUTO_OC_ENGINE, + SMU_13_0_0_ODCAP_AUTO_OC_MEMORY, + SMU_13_0_0_ODCAP_FAN_CURVE, + SMU_13_0_0_ODCAP_AUTO_FAN_ACOUSTIC_LIMIT, + SMU_13_0_0_ODCAP_POWER_MODE, + SMU_13_0_0_ODCAP_PER_ZONE_GFX_VOLTAGE_OFFSET, + SMU_13_0_0_ODCAP_COUNT, +}; + +enum SMU_13_0_0_ODFEATURE_ID { + SMU_13_0_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_13_0_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature + SMU_13_0_0_ODFEATURE_UCLK_LIMITS = 1 << SMU_13_0_0_ODCAP_UCLK_LIMITS, //UCLK Limit feature + SMU_13_0_0_ODFEATURE_POWER_LIMIT = 1 << SMU_13_0_0_ODCAP_POWER_LIMIT, //Power Limit feature + SMU_13_0_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_13_0_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature + SMU_13_0_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_13_0_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature + SMU_13_0_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_13_0_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature + SMU_13_0_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_13_0_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature + SMU_13_0_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_13_0_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature + SMU_13_0_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_13_0_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature + SMU_13_0_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_13_0_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature + SMU_13_0_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_13_0_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature + SMU_13_0_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_13_0_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature + SMU_13_0_0_ODFEATURE_FAN_CURVE = 1 << SMU_13_0_0_ODCAP_FAN_CURVE, //Fan Curve feature + SMU_13_0_0_ODFEATURE_AUTO_FAN_ACOUSTIC_LIMIT = 1 << SMU_13_0_0_ODCAP_AUTO_FAN_ACOUSTIC_LIMIT, //Auto Fan Acoustic RPM feature + SMU_13_0_0_ODFEATURE_POWER_MODE = 1 << SMU_13_0_0_ODCAP_POWER_MODE, //Optimized GPU Power Mode feature + SMU_13_0_0_ODFEATURE_PER_ZONE_GFX_VOLTAGE_OFFSET = 1 << SMU_13_0_0_ODCAP_PER_ZONE_GFX_VOLTAGE_OFFSET, //Perzone voltage offset feature + SMU_13_0_0_ODFEATURE_COUNT = 16, +}; + +#define SMU_13_0_0_MAX_ODFEATURE 32 //Maximum Number of OD Features + +enum SMU_13_0_0_ODSETTING_ID { + SMU_13_0_0_ODSETTING_GFXCLKFMAX = 0, + SMU_13_0_0_ODSETTING_GFXCLKFMIN, + SMU_13_0_0_ODSETTING_UCLKFMIN, + SMU_13_0_0_ODSETTING_UCLKFMAX, + SMU_13_0_0_ODSETTING_POWERPERCENTAGE, + SMU_13_0_0_ODSETTING_FANRPMMIN, + SMU_13_0_0_ODSETTING_FANRPMACOUSTICLIMIT, + SMU_13_0_0_ODSETTING_FANTARGETTEMPERATURE, + SMU_13_0_0_ODSETTING_OPERATINGTEMPMAX, + SMU_13_0_0_ODSETTING_ACTIMING, + SMU_13_0_0_ODSETTING_FAN_ZERO_RPM_CONTROL, + SMU_13_0_0_ODSETTING_AUTOUVENGINE, + SMU_13_0_0_ODSETTING_AUTOOCENGINE, + SMU_13_0_0_ODSETTING_AUTOOCMEMORY, + SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_1, + SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_1, + SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_2, + SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_2, + SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_3, + SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_3, + SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_4, + SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_4, + SMU_13_0_0_ODSETTING_FAN_CURVE_TEMPERATURE_5, + SMU_13_0_0_ODSETTING_FAN_CURVE_SPEED_5, + SMU_13_0_0_ODSETTING_AUTO_FAN_ACOUSTIC_LIMIT, + SMU_13_0_0_ODSETTING_POWER_MODE, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_1, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_2, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_3, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_4, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_5, + SMU_13_0_0_ODSETTING_PER_ZONE_GFX_VOLTAGE_OFFSET_POINT_6, + SMU_13_0_0_ODSETTING_COUNT, +}; +#define SMU_13_0_0_MAX_ODSETTING 64 //Maximum Number of ODSettings + +enum SMU_13_0_0_PWRMODE_SETTING { + SMU_13_0_0_PMSETTING_POWER_LIMIT_QUIET = 0, + SMU_13_0_0_PMSETTING_POWER_LIMIT_BALANCE, + SMU_13_0_0_PMSETTING_POWER_LIMIT_TURBO, + SMU_13_0_0_PMSETTING_POWER_LIMIT_RAGE, + SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_QUIET, + SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_BALANCE, + SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_TURBO, + SMU_13_0_0_PMSETTING_ACOUSTIC_TEMP_RAGE, + SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_QUIET, + SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_BALANCE, + SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_TURBO, + SMU_13_0_0_PMSETTING_ACOUSTIC_TARGET_RPM_RAGE, + SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_QUIET, + SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, + SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, + SMU_13_0_0_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, +}; +#define SMU_13_0_0_MAX_PMSETTING 32 //Maximum Number of PowerMode Settings + +struct smu_13_0_0_overdrive_table { + uint8_t revision; //Revision = SMU_13_0_0_PP_OVERDRIVE_VERSION + uint8_t reserve[3]; //Zero filled field reserved for future use + uint32_t feature_count; //Total number of supported features + uint32_t setting_count; //Total number of supported settings + uint8_t cap[SMU_13_0_0_MAX_ODFEATURE]; //OD feature support flags + uint32_t max[SMU_13_0_0_MAX_ODSETTING]; //default maximum settings + uint32_t min[SMU_13_0_0_MAX_ODSETTING]; //default minimum settings + int16_t pm_setting[SMU_13_0_0_MAX_PMSETTING]; //Optimized power mode feature settings +}; + +enum SMU_13_0_0_PPCLOCK_ID { + SMU_13_0_0_PPCLOCK_GFXCLK = 0, + SMU_13_0_0_PPCLOCK_SOCCLK, + SMU_13_0_0_PPCLOCK_UCLK, + SMU_13_0_0_PPCLOCK_FCLK, + SMU_13_0_0_PPCLOCK_DCLK_0, + SMU_13_0_0_PPCLOCK_VCLK_0, + SMU_13_0_0_PPCLOCK_DCLK_1, + SMU_13_0_0_PPCLOCK_VCLK_1, + SMU_13_0_0_PPCLOCK_DCEFCLK, + SMU_13_0_0_PPCLOCK_DISPCLK, + SMU_13_0_0_PPCLOCK_PIXCLK, + SMU_13_0_0_PPCLOCK_PHYCLK, + SMU_13_0_0_PPCLOCK_DTBCLK, + SMU_13_0_0_PPCLOCK_COUNT, +}; +#define SMU_13_0_0_MAX_PPCLOCK 16 //Maximum Number of PP Clocks + +struct smu_13_0_0_powerplay_table { + struct atom_common_table_header header; //For SMU13, header.format_revision = 15, header.content_revision = 0 + uint8_t table_revision; //For SMU13, table_revision = 2 + uint8_t padding; + uint16_t table_size; //Driver portion table size. The offset to smc_pptable including header size + uint32_t golden_pp_id; //PPGen use only: PP Table ID on the Golden Data Base + uint32_t golden_revision; //PPGen use only: PP Table Revision on the Golden Data Base + uint16_t format_id; //PPGen use only: PPTable for different ASICs. For SMU13 this should be 0x80 + uint32_t platform_caps; //POWERPLAYABLE::ulPlatformCaps + + uint8_t thermal_controller_type; //one of SMU_13_0_0_PP_THERMALCONTROLLER + + uint16_t small_power_limit1; + uint16_t small_power_limit2; + uint16_t boost_power_limit; //For Gemini Board, when the slave adapter is in BACO mode, the master adapter will use this boost power limit instead of the default power limit to boost the power limit. + uint16_t software_shutdown_temp; + + uint32_t reserve[45]; + + struct smu_13_0_0_overdrive_table overdrive_table; + uint8_t padding1; + PPTable_t smc_pptable; //PPTable_t in driver_if.h +}; + +#pragma pack(pop) + +#endif -- cgit v1.2.3 From c5b3cc417b0260abc74ed32f6baa626c9de917c0 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Sep 2025 20:12:43 +0530 Subject: drm/amdgpu: use hmm_pfns instead of array of pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we dont need to allocate local array of pages to hold the pages returned by the hmm, instead we could use the hmm_range structure itself to get to hmm_pfn and get the required pages directly. This avoids call to alloc/free quite a lot. Signed-off-by: Sunil Khatri Suggested-by: Christian König Reviewed-by: Christian König Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 +++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 30 +++++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 11 +-------- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 9 files changed, 25 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c3b34a410375..7c54fe6b0f5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &range); if (ret) { if (ret == -EAGAIN) pr_debug("Failed to get user pages, try again\n"); @@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, pr_err("%s: Failed to reserve BO\n", __func__); goto release_out; } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) @@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &mem->range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2595,6 +2597,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = 0; } + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); + mutex_lock(&process_info->notifier_lock); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 555cd6d877c3..a716c9886c74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo *bo; struct amdgpu_bo_va *bo_va; uint32_t priority; - struct page **user_pages; struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index defb511acc5a..744e6ff69814 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -885,24 +886,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = e->bo; int i; - e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL); - if (!e->user_pages) { - drm_err(adev_to_drm(p->adev), "kvmalloc_array failure\n"); - r = -ENOMEM; - goto out_free_user_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); - if (r) { - kvfree(e->user_pages); - e->user_pages = NULL; + r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); + if (r) goto out_free_user_pages; - } for (i = 0; i < bo->tbo.ttm->num_pages; i++) { - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) { userpage_invalidated = true; break; } @@ -946,7 +935,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && - e->user_invalidated && e->user_pages) { + e->user_invalidated) { amdgpu_bo_placement_from_domain(e->bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, @@ -955,11 +944,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, - e->user_pages); + e->range); } - - kvfree(e->user_pages); - e->user_pages = NULL; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -1001,11 +987,7 @@ out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->bo; - if (!e->user_pages) - continue; amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); - kvfree(e->user_pages); - e->user_pages = NULL; e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 1679c2c3d505..b7ebae289bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -572,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &range); + r = amdgpu_ttm_tt_get_user_pages(bo, &range); if (r) goto release_object; @@ -581,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto user_pages_done; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index e36fede7f74c..2c6a6b858112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -167,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range) { struct hmm_range *hmm_range; unsigned long end; unsigned long timeout; - unsigned long i; unsigned long *pfns; int r = 0; @@ -222,14 +221,6 @@ retry: hmm_range->start = start; hmm_range->hmm_pfns = pfns; - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; pages && i < npages; i++) - pages[i] = hmm_pfn_to_page(pfns[i]); - *phmm_range = hmm_range; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index e2edcd010ccc..953e1d06de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -33,7 +33,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range); bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 71b6691edab4..aa9ee5dffa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -708,7 +708,7 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; @@ -745,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, - readonly, NULL, pages, range); + readonly, NULL, range); out_unlock: mmap_read_unlock(mm); if (r) @@ -797,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, * that backs user memory and will ultimately be mapped into the device * address space. */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range) { unsigned long i; for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i] = pages ? pages[i] : NULL; + ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 1c5c7836ce4f..0be2728aa872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -191,7 +191,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range); void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, struct hmm_range *range); @@ -199,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages, struct hmm_range **range) { return -EPERM; @@ -215,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, } #endif -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 68ba239b2e5d..273f42e3afdd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, WRITE_ONCE(p->svms.faulting_task, current); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, NULL, + readonly, owner, &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) -- cgit v1.2.3 From 1fb710793ce2619223adffaf981b1ff13cd48f17 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 18 Sep 2025 19:48:00 -0500 Subject: drm/amdgpu: Enable MES lr_compute_wa by default The MES set resources packet has an optional bit 'lr_compute_wa' which can be used for preventing MES hangs on long compute jobs. Set this bit by default. Co-developed-by: Yifan Zhang Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 +++++ drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 3b91ea601add..e82188431f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -713,6 +713,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(mes->adev->dev, + "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); + if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 998893dff08e..aff06f06aeee 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -769,6 +769,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(adev->dev, + "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 15680c3f4970..ab1cfc92dbeb 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t enable_mes_sch_stb_log : 1; uint32_t limit_single_process : 1; uint32_t is_strix_tmz_wa_enabled :1; - uint32_t reserved : 13; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 12; }; uint32_t uint32_t_all; }; diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index c04bd351b250..69611c7e30e3 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -287,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; uint32_t enable_mes_fence_int: 1; - uint32_t reserved : 10; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 9; }; uint32_t uint32_all; }; -- cgit v1.2.3 From 854b9ab637d72bad8afe8f11cf6edd9aab0d1cf9 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Thu, 18 Sep 2025 12:34:48 -0400 Subject: drm/amdgpu: Update amdgpu_vcn5_fw_shared for vcn_5_0_1 Align vcn5_fw_shared structure with FW Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index bebfc2b34afe..dc8a17bcc3c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -501,7 +501,7 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; - uint8_t pad3[9]; + uint8_t pad3[404]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 -- cgit v1.2.3 From c82b8f96eb972eb313e20ede5eb6617bc393c490 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 15 Sep 2025 11:22:52 -0400 Subject: drm/amd/display: Disable stutter when programming watermarks on dcn32 [WHY&HOW] Reprogramming watermarks with stutter allowed can cause instability on some ASICs. Disable it prior to raising watermarks (prepare bandwidth), then re-enable after lowering (optimize bandwidth). Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c | 37 +++++++++++++++++++--- .../amd/display/dc/resource/dcn32/dcn32_resource.c | 1 + .../amd/display/dc/resource/dcn32/dcn32_resource.h | 3 +- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9d15ba591772..62d132bf3028 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1163,6 +1163,7 @@ struct dc_debug_options { unsigned int auxless_alpm_lfps_silence_ns; unsigned int auxless_alpm_lfps_t1t2_us; short auxless_alpm_lfps_t1t2_offset_us; + bool disable_stutter_for_wm_program; }; diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 92957398ac0a..3b71bfaca291 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -28,6 +28,7 @@ #include "dcn32_hubbub.h" #include "dm_services.h" #include "reg_helper.h" +#include "dal_asic_id.h" #define CTX \ @@ -72,6 +73,14 @@ static void dcn32_init_crb(struct hubbub *hubbub) REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F); } +static void hubbub32_set_sdp_control(struct hubbub *hubbub, bool dc_control) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + REG_UPDATE(DCHUBBUB_SDPIF_CFG0, + SDPIF_PORT_CONTROL, dc_control); +} + void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -754,8 +763,17 @@ static bool hubbub32_program_watermarks( unsigned int refclk_mhz, bool safe_to_lower) { + struct dc *dc = hubbub->ctx->dc; bool wm_pending = false; + if (!safe_to_lower && dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + /* before raising watermarks, SDP control give to DF, stutter must be disabled */ + hubbub32_set_sdp_control(hubbub, false); + hubbub1_allow_self_refresh_control(hubbub, false); + } + if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) wm_pending = true; @@ -786,10 +804,20 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower) { + /* after lowering watermarks, stutter setting is restored, SDP control given to DC */ + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + + if (dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + hubbub32_set_sdp_control(hubbub, true); + } + } else if (dc->debug.disable_stutter) { + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + } - hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); + hubbub32_force_usr_retraining_allow(hubbub, dc->debug.force_usr_allow); return wm_pending; } @@ -974,8 +1002,7 @@ void hubbub32_init(struct hubbub *hubbub) ignore the "df_pre_cstate_req" from the SDP port control. only the DCN will determine when to connect the SDP port */ - REG_UPDATE(DCHUBBUB_SDPIF_CFG0, - SDPIF_PORT_CONTROL, 1); + hubbub32_set_sdp_control(hubbub, true); /*Set SDP's max outstanding request to 512 must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/ REG_UPDATE(DCHUBBUB_SDPIF_CFG1, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 8f80ccb846d7..3965a7f1b64b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -739,6 +739,7 @@ static const struct dc_debug_options debug_defaults_drv = { .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, .enable_legacy_fast_update = false, + .disable_stutter_for_wm_program = true }; static struct dce_aux *dcn32_aux_engine_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 20d714596021..99f0432288b4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -1230,7 +1230,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, SR(DCHUBBUB_ARB_MALL_CNTL), \ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ - SR(SDPIF_REQUEST_RATE_LIMIT) + SR(SDPIF_REQUEST_RATE_LIMIT), \ + SR(DCHUBBUB_SDPIF_CFG0) /* DCCG */ -- cgit v1.2.3 From 54980f3c63ed3e5cca3d251416581193c90eae76 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 15 Sep 2025 11:35:37 -0400 Subject: drm/amd/display: Add missing post flip calls [WHY&HOW] dc_post_update_surfaces_to_stream needs to be called after a full update completes in order to optimize clocks and watermarks for power. Add missing calls before idle entry is requested to ensure optimal power. Reviewed-by: Aurabindo Pillai Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7012b2c692b3..09427e056055 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -417,8 +417,7 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc, /* * Previous frame finished and HW is ready for optimization. */ - if (update_type == UPDATE_TYPE_FAST) - dc_post_update_surfaces_to_stream(dc); + dc_post_update_surfaces_to_stream(dc); return dc_update_planes_and_stream(dc, array_of_surface_update, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 466dccb355d7..1ec9d03ad747 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -218,8 +218,10 @@ static void amdgpu_dm_idle_worker(struct work_struct *work) break; } - if (idle_work->enable) + if (idle_work->enable) { + dc_post_update_surfaces_to_stream(idle_work->dm->dc); dc_allow_idle_optimizations(idle_work->dm->dc, true); + } mutex_unlock(&idle_work->dm->dc_lock); } idle_work->dm->idle_workqueue->running = false; @@ -273,8 +275,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + dc_post_update_surfaces_to_stream(dm->dc); dc_allow_idle_optimizations(dm->dc, true); + } mutex_unlock(&dm->dc_lock); -- cgit v1.2.3 From c8bedab2d9a1a0daa49ac20f9928a943f7205582 Mon Sep 17 00:00:00 2001 From: Karthi Kandasamy Date: Wed, 3 Sep 2025 14:16:27 +0200 Subject: drm/amd/display: Add AVI infoframe copy in copy_stream_update_to_stream [WHY] Ensure AVI infoframe updates from stream updates are applied to the active stream so OS overrides are not lost. [HOW] Copy avi_infopacket to stream when valid flag is set. Follow existing infopacket copy pattern and perform a basic validity check before assignment. Reviewed-by: Aric Cyr Signed-off-by: Karthi Kandasamy Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 7 ++++++- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c4dd52ed377d..974d63af1c70 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3313,6 +3313,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->adaptive_sync_infopacket) stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket; + if (update->avi_infopacket) + stream->avi_infopacket = *update->avi_infopacket; + if (update->dither_option) stream->dither_option = *update->dither_option; @@ -3607,7 +3610,8 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->vsp_infopacket || stream_update->hfvsif_infopacket || stream_update->adaptive_sync_infopacket || - stream_update->vtem_infopacket) { + stream_update->vtem_infopacket || + stream_update->avi_infopacket) { resource_build_info_frame(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -5079,6 +5083,7 @@ static bool full_update_required(struct dc *dc, stream_update->hfvsif_infopacket || stream_update->vtem_infopacket || stream_update->adaptive_sync_infopacket || + stream_update->avi_infopacket || stream_update->dpms_off || stream_update->allow_freesync || stream_update->vrr_active_variable || diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index cbca3c67f439..bc5dedf5f60c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4410,8 +4410,14 @@ static void set_avi_info_frame( unsigned int fr_ind = pipe_ctx->stream->timing.fr_index; enum dc_timing_3d_format format; + if (stream->avi_infopacket.valid) { + *info_packet = stream->avi_infopacket; + return; + } + memset(&hdmi_info, 0, sizeof(union hdmi_info_packet)); + color_space = pipe_ctx->stream->output_color_space; if (color_space == COLOR_SPACE_UNKNOWN) color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 5fc6fea211de..76cf9fdedab0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -203,6 +203,7 @@ struct dc_stream_state { struct dc_info_packet hfvsif_infopacket; struct dc_info_packet vtem_infopacket; struct dc_info_packet adaptive_sync_infopacket; + struct dc_info_packet avi_infopacket; uint8_t dsc_packed_pps[128]; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -335,6 +336,8 @@ struct dc_stream_update { struct dc_info_packet *hfvsif_infopacket; struct dc_info_packet *vtem_infopacket; struct dc_info_packet *adaptive_sync_infopacket; + struct dc_info_packet *avi_infopacket; + bool *dpms_off; bool integer_scaling_update; bool *allow_freesync; -- cgit v1.2.3 From 74d70e309d155550c9bd8bce74812875df47fb7b Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Fri, 5 Sep 2025 11:38:21 +0800 Subject: drm/amd/display: Add monitor patch to read psr cap again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why & How] According to the vendor’s requirement, after each OUI write, the PSR cap must be read; otherwise, the vendor will default to using PSRSU. But its PSR cap indicates that it only supports PSR1. Reviewed-by: Wenjing Liu Signed-off-by: Paul Hsieh Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + .../amd/display/dc/link/protocols/link_edp_panel_control.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 619834a328a3..b5aa03a3e39c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1217,6 +1217,7 @@ struct dc_panel_config { bool rc_disable; bool rc_allow_static_screen; bool rc_allow_fullscreen_VPB; + bool read_psrcap_again; unsigned int replay_enable_option; } psr; /* ABM */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 8b7b87b21c2e..5e806edbb9f6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -703,6 +703,20 @@ bool edp_setup_psr(struct dc_link *link, if (!link) return false; + /* This is a workaround: some vendors require the source to + * read the PSR cap; otherwise, the vendor's PSR feature will + * fall back to its default behavior, causing a misconfiguration + * of this feature. + */ + if (link->panel_config.psr.read_psrcap_again) { + dm_helpers_dp_read_dpcd( + link->ctx, + link, + DP_PSR_SUPPORT, + &link->dpcd_caps.psr_info.psr_version, + sizeof(link->dpcd_caps.psr_info.psr_version)); + } + //Clear PSR cfg memset(&psr_configuration, 0, sizeof(psr_configuration)); dm_helpers_dp_write_dpcd( -- cgit v1.2.3 From 6cec25f5b5660602b1953038cf40968b2d71c403 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 4 Sep 2025 13:49:35 -0500 Subject: drm/amd/display: Handle interpolation for first data point [Why] If the first data point for a custom brightness curve is not 0% luminance then the first few luminance values will be ignored. [How] Check signal is below first data point and if so do linear interpolation to 0 instead. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 09427e056055..271ea1615178 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4828,6 +4828,16 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap if (!caps->data_points) return; + /* + * Handle the case where brightness is below the first data point + * Interpolate between (0,0) and (first_signal, first_lum) + */ + if (brightness < caps->luminance_data[0].input_signal) { + lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness, + caps->luminance_data[0].input_signal); + goto scale; + } + left = 0; right = caps->data_points - 1; while (left <= right) { -- cgit v1.2.3 From f082daf08f2ff313bdf9cf929a28f6d888117986 Mon Sep 17 00:00:00 2001 From: Lo-an Chen Date: Mon, 25 Aug 2025 18:16:24 +0800 Subject: drm/amd/display: Init dispclk from bootup clock for DCN314 [Why] Driver does not pick up and save vbios's clocks during init clocks, the dispclk in clk_mgr will keep 0 until the first update clocks. In some cases, OS changes the timing in the second set mode (lower the pixel clock), causing the driver to lower the dispclk in prepare bandwidth, which is illegal and causes grey screen. [How] 1. Dump and save the vbios's clocks, and init the dispclk in dcn314_init_clocks. 2. Fix the condition in dcn314_update_clocks, regarding a 0kHz value. Reviewed-by: Charlene Liu Signed-off-by: Lo-an Chen Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 142 ++++++++++++++++++++- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h | 5 + .../display/dc/resource/dcn314/dcn314_resource.c | 1 + 3 files changed, 143 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 790bbd8235b1..9e63fa72101c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -77,6 +77,7 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger + #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 @@ -87,8 +88,70 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define regCLK1_CLK0_DFS_CNTL 0x0269 +#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DFS_CNTL 0x026c +#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DFS_CNTL 0x026f +#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DFS_CNTL 0x0272 +#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DFS_CNTL 0x0275 +#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DFS_CNTL 0x0278 +#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_CURRENT_CNT 0x02fb +#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK1_CURRENT_CNT 0x02fc +#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK2_CURRENT_CNT 0x02fd +#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK3_CURRENT_CNT 0x02fe +#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK4_CURRENT_CNT 0x02ff +#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK5_CURRENT_CNT 0x0300 +#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0 + +#define regCLK1_CLK0_BYPASS_CNTL 0x028a +#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_BYPASS_CNTL 0x0293 +#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0 #define regCLK1_CLK2_BYPASS_CNTL 0x029c #define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_BYPASS_CNTL 0x02a5 +#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_BYPASS_CNTL 0x02ae +#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_BYPASS_CNTL 0x02b7 +#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_DS_CNTL 0x0283 +#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DS_CNTL 0x028c +#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DS_CNTL 0x0295 +#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DS_CNTL 0x029e +#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DS_CNTL 0x02a7 +#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DS_CNTL 0x02b0 +#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_ALLOW_DS 0x0284 +#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK1_ALLOW_DS 0x028d +#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK2_ALLOW_DS 0x0296 +#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK3_ALLOW_DS 0x029f +#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK4_ALLOW_DS 0x02a8 +#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK5_ALLOW_DS 0x02b1 +#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 @@ -185,6 +248,8 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int); + struct clk_log_info log_info = {0}; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -200,6 +265,9 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + + dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info); + clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000; } void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, @@ -218,6 +286,8 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + display_count = dcn314_get_active_display_cnt_wa(dc, context); + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -236,7 +306,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn314_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ if (display_count == 0) { union display_idle_optimization_u idle_info = { 0 }; @@ -293,11 +362,19 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; @@ -385,10 +462,65 @@ bool dcn314_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + +static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) { - return; + + struct dcn35_clk_internal internal = {0}; + + dcn314_dump_clk_registers_internal(&internal, clk_mgr_base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + } static struct clk_bw_params dcn314_bw_params = { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index 002c28e80720..0577eb527bc3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -65,4 +65,9 @@ void dcn314_clk_mgr_construct(struct dc_context *ctx, void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info); + + #endif //__DCN314_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 663c49cce4aa..d4917a35b991 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -927,6 +927,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_legacy_fast_update = true, .using_dml2 = false, .disable_dsc_power_gate = true, + .min_disp_clk_khz = 100000, }; static const struct dc_panel_config panel_config_defaults = { -- cgit v1.2.3 From 3451021a9e153e3f3067c90429c8317c126e159e Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Tue, 9 Sep 2025 14:15:05 -0400 Subject: drm/amd/display: Enable DTM v3 on dGPUs with DCN 3.1+ [Why&How] Right now, only selected APUs have enabled DTM v3, which allows to use newer firmware for content protection. We want to enable it on the dGPUs starting with DCN 3.2 Reviewed-by: Aurabindo Pillai Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 58e084f52526..19038f336155 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -768,14 +768,18 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs; config->psp.handle = &adev->psp; - if (dc->ctx->dce_version == DCN_VERSION_3_1 || + if (dc->ctx->dce_version == DCN_VERSION_3_1 || dc->ctx->dce_version == DCN_VERSION_3_14 || dc->ctx->dce_version == DCN_VERSION_3_15 || - dc->ctx->dce_version == DCN_VERSION_3_5 || + dc->ctx->dce_version == DCN_VERSION_3_16 || + dc->ctx->dce_version == DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_21 || + dc->ctx->dce_version == DCN_VERSION_3_5 || dc->ctx->dce_version == DCN_VERSION_3_51 || - dc->ctx->dce_version == DCN_VERSION_3_6 || - dc->ctx->dce_version == DCN_VERSION_3_16) + dc->ctx->dce_version == DCN_VERSION_3_6 || + dc->ctx->dce_version == DCN_VERSION_4_01) config->psp.caps.dtm_v3_supported = 1; + config->ddc.handle = dc_get_link_at_index(dc, i); ddc_funcs->write_i2c = lp_write_i2c; -- cgit v1.2.3 From 0bf6b216d4783cb51f9af05a49d3cce4fc22dc24 Mon Sep 17 00:00:00 2001 From: Sridevi Arvindekar Date: Wed, 10 Sep 2025 11:04:07 -0400 Subject: drm/amd/display: Fix for test crash due to power gating [Why/How] Call power gating routine only if it is defined. Reviewed-by: Alvin Lee Signed-off-by: Sridevi Arvindekar Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 417f2679723e..95af58cbb92a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -3131,7 +3131,8 @@ void dcn20_fpga_init_hw(struct dc *dc) res_pool->dccg->funcs->dccg_init(res_pool->dccg); //Enable ability to power gate / don't force power on permanently - hws->funcs.enable_power_gating_plane(hws, true); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(hws, true); // Specific to FPGA dccg and registers REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF); -- cgit v1.2.3 From 9e5d4a5e27c6dc4e1b4fc9d654d13de12b8ce156 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 9 Sep 2025 16:03:08 -0400 Subject: drm/amd/display: Use mpc.preblend flag to indicate preblend [Description] Modifications in per asic capability means mpc.preblend flag should be used to indicate preblend. Update relevant paths to use this flag. Fixes: 39923050615c ("drm/amd/display: Clear DPP 3DLUT Cap") Reviewed-by: Dillon Varone Signed-off-by: Alvin Lee Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index c7387af725d6..b0ef157f1333 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -821,7 +821,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *shaper = NULL, *lut3d = NULL; uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; - bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; /* shaper LUT is only available if 3D LUT color caps */ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 08f629c64df3..e027798ece03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1633,7 +1633,7 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, dm->adev->mode_info.plane_ctm_property, 0); - if (dpp_color_caps.hw_3d_lut) { + if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) { drm_object_attach_property(&plane->base, mode_info.plane_shaper_lut_property, 0); drm_object_attach_property(&plane->base, -- cgit v1.2.3 From b65cf4baeb24bdb5fee747679ee88f1ade5c1d6c Mon Sep 17 00:00:00 2001 From: Allen Li Date: Fri, 5 Sep 2025 16:58:38 +0800 Subject: drm/amd/display: Add fast sync field in ultra sleep more for DMUB [Why&How] We need to inform DMUB whether fast sync in ultra sleep mode is supported, so that it can disable desync error detection when the it is not enabled. This helps prevent unexpected desync errors when transitioning out of ultra sleep mode. Add fast sync in ultra sleep mode field in replay copy setting command. Reviewed-by: Robin Chen Reviewed-by: Nicholas Kazlauskas Signed-off-by: Allen Li Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 1 + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 9e2a473a8852..f9542edff14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -169,6 +169,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line; copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode; copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index e65747f7f12f..6db19e49a05c 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4142,10 +4142,14 @@ struct dmub_cmd_replay_copy_settings_data { * @hpo_link_enc_inst: HPO link encoder instance */ uint8_t hpo_link_enc_inst; + /** + * Determines if fast sync in ultra sleep mode is enabled/disabled. + */ + uint8_t replay_support_fast_resync_in_ultra_sleep_mode; /** * @pad: Align structure to 4 byte boundary. */ - uint8_t pad[2]; + uint8_t pad[1]; }; -- cgit v1.2.3 From d19f570cdf94caa5fad0f1403466e01a7c430b8f Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 22 Aug 2025 13:23:18 -0400 Subject: drm/amd/display: Isolate dcn401 SMU functions [WHY&HOW] SMU interfaces are not backwards and forwards compatible, so they should be isolated per version. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 26 ++--- .../dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c | 118 +++++++++++++++++++++ .../dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h | 10 +- 3 files changed, 140 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 47461f249e83..306016c1f109 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -162,7 +162,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c unsigned int i; char *entry_i = (char *)entry_0; - uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); + uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); if (ret & (1 << 31)) /* fine-grained, only min and max */ @@ -174,7 +174,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c /* if the initial message failed, num_levels will be 0 */ for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) { - *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); + *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]); } } @@ -231,20 +231,20 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr->smu_present = false; clk_mgr->dpm_present = false; - if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) + if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) clk_mgr->smu_present = true; if (!clk_mgr->smu_present) return; - dcn30_smu_check_driver_if_version(clk_mgr); - dcn30_smu_check_msg_header_version(clk_mgr); + dcn401_smu_check_driver_if_version(clk_mgr); + dcn401_smu_check_msg_header_version(clk_mgr); /* DCFCLK */ dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz, &num_entries_per_clk->num_dcfclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); + clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0; @@ -253,7 +253,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK, &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, &num_entries_per_clk->num_socclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); + clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0; @@ -263,7 +263,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_entries_per_clk->num_dtbclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); + clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0; @@ -273,7 +273,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz, &num_entries_per_clk->num_dispclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); + clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0; @@ -1318,8 +1318,8 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) table->Watermarks.WatermarkRow[i].WmSetting = i; table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type; } - dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); - dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); + dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); + dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr); } @@ -1390,7 +1390,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; } - clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0; @@ -1399,7 +1399,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_FCLK, &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, &num_entries_per_clk->num_fclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); + clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 21c35528f61f..347a0d66d653 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -139,6 +139,59 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg return false; } +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version) +{ + smu_print("SMU Get SMU version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetSmuVersion, 0, version)) { + + smu_print("SMU version: %d\n", *version); + + return true; + } + + return false; +} + +/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */ +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check driver if version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDriverIfVersion, 0, &response)) { + + smu_print("SMU driver if version: %d\n", response); + + if (response == SMU14_DRIVER_IF_VERSION) + return true; + } + + return false; +} + +/* Message output should match DALSMC_VERSION in dalsmc.h */ +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check msg header version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) { + + smu_print("SMU msg header version: %d\n", response); + + if (response == DALSMC_VERSION) + return true; + } + + return false; +} + void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support) { smu_print("FCLK P-state support value is : %d\n", support); @@ -163,6 +216,22 @@ void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsi smu_print("Numways for SubVP : %d\n", num_ways); } +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) +{ + smu_print("SMU Set DRAM addr high: %d\n", addr_high); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL); +} + +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low) +{ + smu_print("SMU Set DRAM addr low: %d\n", addr_low); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL); +} + void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) { smu_print("SMU Transfer WM table DRAM 2 SMU\n"); @@ -348,3 +417,52 @@ unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr return response; } + +/* + * Frequency in MHz returned in lower 16 bits for valid DPM level + * + * Call with dpm_level = 0xFF to query features, return value will be: + * Bits 7:0 - number of DPM levels + * Bit 28 - 1 = auto DPM on + * Bit 29 - 1 = sweep DPM on + * Bit 30 - 1 = forced DPM on + * Bit 31 - 0 = discrete, 1 = fine-grained + * + * With fine-grained DPM, only min and max frequencies will be reported + * + * Returns 0 on failure + */ +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type, lower 8 bits for DPM level */ + uint32_t param = (clk << 16) | dpm_level; + + smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDpmFreqByIndex, param, &response); + + smu_print("SMU dpm freq: %d MHz\n", response); + + return response; +} + +/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */ +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type */ + uint32_t param = clk << 16; + + smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response); + + smu_print("SMU DC mode max DMP freq: %d MHz\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index e02eb1294b37..4f5ac603e822 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -7,11 +7,17 @@ #include "os_types.h" #include "core_types.h" -#include "dcn32/dcn32_clk_mgr_smu_msg.h" +struct clk_mgr_internal; + +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version); +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr); +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr); void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways); +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high); +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low); void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr); unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz); @@ -29,5 +35,7 @@ bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk); +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level); #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ -- cgit v1.2.3 From adb441ca64a2f3972eb2919e2e340c83a3212479 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 10 Sep 2025 10:55:48 -0400 Subject: drm/amd/display: Refactor SMU tracing [WHY&HOW] Add new tracing and performance measurements for SMU messaging. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c | 4 ++-- .../drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c | 2 +- .../drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c | 5 +++-- .../amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c | 12 ++++++++---- drivers/gpu/drm/amd/display/dc/dm_services.h | 11 ++++++----- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 132de4071efd..8550d5e8b753 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -53,11 +53,11 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc func_name, line); } -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx) { } -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx) +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx) { } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c index 3253115a153d..827bc2431d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c @@ -69,7 +69,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un /* handle DALSMC_Result_CmdRejectedBusy? */ - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c index cf2d35363e8b..5d80fdf63ffc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c @@ -63,7 +63,8 @@ static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un udelay(delay_us); } while (max_retries--); - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + return reg; } @@ -120,7 +121,7 @@ static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_m *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 347a0d66d653..3a263840893e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -57,6 +57,8 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin /* Wait for response register to be ready */ dcn401_smu_wait_for_response(clk_mgr, 10, 200000); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -71,9 +73,11 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin if (param_out) *param_out = REG_READ(DAL_ARG_REG); + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); return false; } @@ -102,8 +106,6 @@ static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_ *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); - return reg; } @@ -115,6 +117,8 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Wait for response register to be ready */ dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -124,18 +128,18 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Trigger the message transaction by writing the message ID */ REG_WRITE(DAL_MSG_REG, msg_id); - TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx); - /* Wait for response */ if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) { if (param_out) *param_out = REG_READ(DAL_ARG_REG); *total_delay_us = delay1_us + delay2_us; + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } *total_delay_us = delay1_us + 2000000; + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 7b398d4f4439..fbbf9c757b3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -277,12 +277,13 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc /* * SMU message tracing */ -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx); -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx); - -#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_msg(msg_id, param_in, ctx) -#define TRACE_SMU_DELAY(response_delay, ctx) dm_trace_smu_delay(response_delay, ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx); +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx); +#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx) +#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx) /* * DMUB Interfaces -- cgit v1.2.3 From c480d074b3ff27a8d582505dfd53c9b091b63d57 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 10 Sep 2025 16:55:23 -0400 Subject: drm/amd/display: Set wm_pending when disable stutter w/a used [WHY&HOW] When stutter is disabled prior to watermark programming due to a w/a, wm_pending should be returned as true. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Aurabindo Pillai Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 3b71bfaca291..4d4ca6d77bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -770,6 +770,7 @@ static bool hubbub32_program_watermarks( (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { /* before raising watermarks, SDP control give to DF, stutter must be disabled */ + wm_pending = true; hubbub32_set_sdp_control(hubbub, false); hubbub1_allow_self_refresh_control(hubbub, false); } -- cgit v1.2.3 From 19d8f3192f78ce38e710da2df8bad2bc972a7a5a Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 11 Sep 2025 13:52:52 -0400 Subject: drm/amd/display: Remove wm_optimized_required [WHY&HOW] This is a redundant field that is identically to optimized_required, so just replace it. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Aurabindo Pillai Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +---- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++---- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 974d63af1c70..5f2d5638c819 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -460,7 +460,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) { - if ((dc->optimized_required || dc->wm_optimized_required) && + if (dc->optimized_required && (stream->adjust.v_total_max != adjust->v_total_max || stream->adjust.v_total_min != adjust->v_total_min)) { stream->adjust.timing_adjust_pending = true; @@ -2577,7 +2577,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; - dc->wm_optimized_required = false; } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -3056,8 +3055,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } - - dc->optimized_required |= dc->wm_optimized_required; } return type; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 62d132bf3028..bd910b0a523a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1736,7 +1736,6 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; - bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 74f5e05f9cb4..e9fe97f0c4ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3347,7 +3347,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required = hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 95af58cbb92a..717d2c4daa88 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2390,10 +2390,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2406,10 +2406,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 1b0b772fc5dd..625653ce5556 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1383,22 +1383,22 @@ void dcn401_prepare_bandwidth(struct dc *dc, false); /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); /* update timeout thresholds */ if (hubbub->funcs->program_arbiter) { - dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + dc->optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + dc->optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } -- cgit v1.2.3 From 35bcc9168f3ce6416cbf3f776758be0937f84cb3 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 11 Sep 2025 17:52:11 -0400 Subject: drm/amd/display: Insert dccg log for easy debug [why] Log for sequence tracking Reviewed-by: Ovidiu (Ovi) Bunea Reviewed-by: Yihan Zhu Signed-off-by: Charlene Liu Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 24 +++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 0ce9489ac6b7..de6d62401362 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -39,6 +39,7 @@ #define CTX \ dccg_dcn->base.ctx +#include "logger_types.h" #define DC_LOGGER \ dccg->ctx->logger @@ -1136,7 +1137,7 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg, default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); + DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); } @@ -1406,6 +1407,10 @@ static void dccg35_set_dtbclk_dto( * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the * programming is handled in program_pix_clk() regardless, so it can be removed from here. */ + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n", + __func__, params->otg_inst, params->pixclk_khz, + params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo); + } else { switch (params->otg_inst) { case 0: @@ -1431,6 +1436,8 @@ static void dccg35_set_dtbclk_dto( REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst); } } @@ -1475,6 +1482,8 @@ static void dccg35_set_dpstreamclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n", + __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst); } @@ -1514,6 +1523,8 @@ static void dccg35_set_dpstreamclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n", + __func__, dp_hpo_inst, enable ? 1 : 0); } @@ -1553,7 +1564,7 @@ static void dccg35_set_physymclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1); + DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1); } @@ -1626,6 +1637,8 @@ static void dccg35_set_physymclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n", + __func__, phy_inst, force_enable ? 1 : 0, clk_src); } static void dccg35_set_valid_pixel_rate( @@ -1673,6 +1686,7 @@ static void dccg35_dpp_root_clock_control( } dccg->dpp_clock_gated[dpp_inst] = !clock_on; + DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on); } static void dccg35_disable_symclk32_se( @@ -1731,6 +1745,7 @@ static void dccg35_disable_symclk32_se( BREAK_TO_DEBUGGER(); return; } + } static void dccg35_init_cb(struct dccg *dccg) @@ -1738,7 +1753,6 @@ static void dccg35_init_cb(struct dccg *dccg) (void)dccg; /* Any RCG should be done when driver enter low power mode*/ } - void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -1753,6 +1767,8 @@ void dccg35_init(struct dccg *dccg) for (otg_inst = 0; otg_inst < 2; otg_inst++) { dccg31_disable_symclk32_le(dccg, otg_inst); dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n", + __func__, otg_inst); } // if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) @@ -1765,6 +1781,8 @@ void dccg35_init(struct dccg *dccg) dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst, otg_inst); dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n", + __func__, otg_inst); } /* -- cgit v1.2.3 From f1fd8a9ac2aa5118f76baf28e6ca4d6962a485be Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 11 Sep 2025 19:20:45 -0400 Subject: drm/amd/display: Correct sw cache timing to ensure dispclk ramping [why] Current driver will cache the dispclk right after send cmd to pmfw, but actual clock not reached yet. Change to only cache the dispclk setting after HW reached to the real clock. Also give some range as it might be in bypass clock setting. Reviewed-by: Yihan Zhu Signed-off-by: Charlene Liu Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 30 +++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 86edf11b8c5a..6fc8a7491684 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -387,6 +387,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, bool update_dispclk = false; bool dpp_clock_lowered = false; int all_active_disps = 0; + int actual_dppclk = 0; if (dc->work_arounds.skip_clock_update) return; @@ -472,14 +473,13 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; - clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; update_dppclk = true; } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { int requested_dispclk_khz = new_clocks->dispclk_khz; - + int actual_dispclk; dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); /* Clamp the requested clock to PMFW based on their limit. */ @@ -487,7 +487,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, requested_dispclk_khz = dc->debug.min_disp_clk_khz; dcn35_smu_set_dispclk(clk_mgr, requested_dispclk_khz); - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; + actual_dispclk = REG_READ(CLK1_CLK0_CURRENT_CNT); + + /*pmfw might set bypass clock which is higher than hardmin*/ + if (actual_dispclk >= new_clocks->dispclk_khz) + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); @@ -505,13 +509,20 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (dpp_clock_lowered) { // increase per DPP DTO before lowering global dppclk dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); - dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); + dcn35_smu_set_dppclk(clk_mgr, new_clocks->dppclk_khz); } else { // increase global DPPCLK before lowering per DPP DTO if (update_dppclk || update_dispclk) - dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); + dcn35_smu_set_dppclk(clk_mgr, new_clocks->dppclk_khz); dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } + if (update_dppclk) { + actual_dppclk = REG_READ(CLK1_CLK1_CURRENT_CNT); + + /*pmfw might set bypass clock which is higher than hardmin*/ + if (actual_dppclk >= new_clocks->dppclk_khz) + clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; + } // notify PMFW of bandwidth per DPIA tunnel if (dc->debug.notify_dpia_hr_bw) @@ -551,7 +562,7 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) * since fractional part is only 16 bit in register definition but is 32 bit * in our fix point definiton, need to shift left by 16 to obtain correct value */ - pll_req.value |= fbmult_frac_val << 16; + pll_req.value |= (long long) fbmult_frac_val << 16; /* multiply by REFCLK period */ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); @@ -778,7 +789,8 @@ static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF; /* Modify previous watermark range to cover up to max */ - table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF; + if (num_valid_sets > 0) + table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF; } num_valid_sets++; } @@ -939,8 +951,8 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk is_valid_clock_value(min_dram_speed_mts)); /* dispclk and dppclk can be max at any voltage, same number of levels for both */ - if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && - clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { + if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS) { + /*numDispclk is the same as numDPPclk*/ max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); max_dppclk = find_max_clk_value(clock_table->DppClocks, -- cgit v1.2.3 From 550038edef7f933591e10e32f94a5e9557b023b8 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 12 Sep 2025 12:37:30 -0400 Subject: drm/amd/display: Revert "correct sw cache timing to ensure dispclk ramping" [why] Need consider SSC enabled case This reverts commit f1fd8a9ac2aa5118f76baf28e6ca4d6962a485be. Reviewed-by: Ovidiu (Ovi) Bunea Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 30 +++++++--------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 6fc8a7491684..86edf11b8c5a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -387,7 +387,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, bool update_dispclk = false; bool dpp_clock_lowered = false; int all_active_disps = 0; - int actual_dppclk = 0; if (dc->work_arounds.skip_clock_update) return; @@ -473,13 +472,14 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; + clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; update_dppclk = true; } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { int requested_dispclk_khz = new_clocks->dispclk_khz; - int actual_dispclk; + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); /* Clamp the requested clock to PMFW based on their limit. */ @@ -487,11 +487,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, requested_dispclk_khz = dc->debug.min_disp_clk_khz; dcn35_smu_set_dispclk(clk_mgr, requested_dispclk_khz); - actual_dispclk = REG_READ(CLK1_CLK0_CURRENT_CNT); - - /*pmfw might set bypass clock which is higher than hardmin*/ - if (actual_dispclk >= new_clocks->dispclk_khz) - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); @@ -509,20 +505,13 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (dpp_clock_lowered) { // increase per DPP DTO before lowering global dppclk dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); - dcn35_smu_set_dppclk(clk_mgr, new_clocks->dppclk_khz); + dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); } else { // increase global DPPCLK before lowering per DPP DTO if (update_dppclk || update_dispclk) - dcn35_smu_set_dppclk(clk_mgr, new_clocks->dppclk_khz); + dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } - if (update_dppclk) { - actual_dppclk = REG_READ(CLK1_CLK1_CURRENT_CNT); - - /*pmfw might set bypass clock which is higher than hardmin*/ - if (actual_dppclk >= new_clocks->dppclk_khz) - clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; - } // notify PMFW of bandwidth per DPIA tunnel if (dc->debug.notify_dpia_hr_bw) @@ -562,7 +551,7 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) * since fractional part is only 16 bit in register definition but is 32 bit * in our fix point definiton, need to shift left by 16 to obtain correct value */ - pll_req.value |= (long long) fbmult_frac_val << 16; + pll_req.value |= fbmult_frac_val << 16; /* multiply by REFCLK period */ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); @@ -789,8 +778,7 @@ static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF; /* Modify previous watermark range to cover up to max */ - if (num_valid_sets > 0) - table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF; + table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF; } num_valid_sets++; } @@ -951,8 +939,8 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk is_valid_clock_value(min_dram_speed_mts)); /* dispclk and dppclk can be max at any voltage, same number of levels for both */ - if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS) { - /*numDispclk is the same as numDPPclk*/ + if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && + clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); max_dppclk = find_max_clk_value(clock_table->DppClocks, -- cgit v1.2.3 From d43cc4ea1f9d720ab4bf06806f79260bfe981508 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Fri, 12 Sep 2025 11:01:50 -0400 Subject: drm/amd/display: Init DCN35 clocks from pre-os HW values [Why] We did not initialize dc clocks with boot-time hw values during init. This lead to incorrect clock values in dc, causing `dcn35_update_clocks` to make incorrect updates. [How] Correctly initialize DC with pre-os clk values from HW. s/dump/save/ as that accurately reflects the purpose of the functions. Fixes: 8774029f76b9 ("drm/amd/display: Add DCN35 CLK_MGR") Reviewed-by: Aurabindo Pillai Signed-off-by: Leo Li Signed-off-by: Fangzhi Zuo Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 121 ++++++++++++++++++++- 1 file changed, 119 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 86edf11b8c5a..b11383fba35f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -587,9 +587,118 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, +static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr_dcn35 *clk_mgr) { + struct dcn35_clk_internal internal = {0}; + char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"}; + + dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + + if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { + DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n"); + + DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n", + regs_and_bypass->dcfclk, + regs_and_bypass->dcf_deep_sleep_divider, + regs_and_bypass->dcf_deep_sleep_allow, + bypass_clks[(int) regs_and_bypass->dcfclk_bypass]); + + DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dprefclk, + bypass_clks[(int) regs_and_bypass->dprefclk_bypass]); + + DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dispclk, + bypass_clks[(int) regs_and_bypass->dispclk_bypass]); + + // REGISTER VALUES + DC_LOG_SMU("reg_name,value,clk_type"); + + DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk", + internal.CLK1_CLK3_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk", + internal.CLK1_CLK4_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider", + internal.CLK1_CLK3_DS_CNTL); + + DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow", + internal.CLK1_CLK3_ALLOW_DS); + + DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk", + internal.CLK1_CLK2_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk", + internal.CLK1_CLK0_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk", + internal.CLK1_CLK1_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass", + internal.CLK1_CLK3_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass", + internal.CLK1_CLK2_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass", + internal.CLK1_CLK0_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass", + internal.CLK1_CLK1_BYPASS_CNTL); + + } } static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) @@ -623,6 +732,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int); init_clk_states(clk_mgr); @@ -633,6 +743,13 @@ void dcn35_init_clocks(struct clk_mgr *clk_mgr) else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35); + + clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10; + if (clk_mgr->boot_snapshot.dtbclk > 59000) { + /*dtbclk enabled based on */ + clk_mgr->clks.dtbclk_en = true; + } } static struct clk_bw_params dcn35_bw_params = { .vram_type = Ddr4MemType, @@ -1323,7 +1440,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); + dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; -- cgit v1.2.3 From dff184cdba518cef0c0eb6ac8ed115cb83cd26a8 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 12 Sep 2025 16:15:44 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.1.28.0 Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 6db19e49a05c..92248224b713 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4143,7 +4143,7 @@ struct dmub_cmd_replay_copy_settings_data { */ uint8_t hpo_link_enc_inst; /** - * Determines if fast sync in ultra sleep mode is enabled/disabled. + * Determines if fast resync in ultra sleep mode is enabled/disabled. */ uint8_t replay_support_fast_resync_in_ultra_sleep_mode; /** -- cgit v1.2.3 From cceb54222ea00cec4960ad346a918920c24e75c5 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 12 Sep 2025 18:23:48 -0500 Subject: drm/amd/display: Promote DC to 3.2.351 - Disable stutter when programming watermarks on dcn32 - Improve brightness calculations - Fix saving vbios clocks during init for DCN314 - Enable DTM 3 on DCN3.1+ dGPUs - Add new ultra sleep field in DMUB - Isolate DCN401 SMU functions - Refactor and add logging of SMU functions. - Add dccg logging - Fix DCN35 clocks initialization Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index bd910b0a523a..79de318c85e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.350" +#define DC_VER "3.2.351" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From 2f9c63883730a0bfecb086e6e59246933f936ca1 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 11 Sep 2025 14:21:19 -0300 Subject: drm/amd/display: update color on atomic commit time Use `atomic_commit_setup` to change the DC stream state. It's a preparation to remove from `atomic_check` changes in CRTC color components of DC stream state and prevent DC to commit TEST_ONLY changes. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/4444 Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 271ea1615178..c0a0efdf44de 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -233,6 +233,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static int amdgpu_dm_connector_get_modes(struct drm_connector *connector); +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state); static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, @@ -3636,7 +3637,7 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { .atomic_commit_tail = amdgpu_dm_atomic_commit_tail, - .atomic_commit_setup = drm_dp_mst_atomic_setup_commit, + .atomic_commit_setup = amdgpu_dm_atomic_setup_commit, }; static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) @@ -10362,6 +10363,39 @@ static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state) } } +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int i, ret; + + ret = drm_dp_mst_atomic_setup_commit(state); + if (ret) + return ret; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + /* + * Color management settings. We also update color properties + * when a modeset is needed, to ensure it gets reprogrammed. + */ + if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream && + (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || + drm_atomic_crtc_needs_modeset(new_crtc_state))) { + ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + if (ret) { + drm_dbg_atomic(state->dev, "Failed to update color state\n"); + return ret; + } + } + } + + return 0; +} + /** * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. * @state: The atomic state to commit -- cgit v1.2.3 From 86a54e45fd91953e2b0e1aa15932112f135e4821 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Date: Thu, 18 Sep 2025 12:42:00 +0530 Subject: drm/amdgpu: Use kmalloc_array() instead of kmalloc() Documentation/process/deprecated.rst recommends against the use of kmalloc with dynamic size calculations due to the risk of overflow and smaller allocation being made than the caller was expecting. Replace kmalloc() with kmalloc_array() in amdgpu_amdkfd_gfx_v10.c, amdgpu_amdkfd_gfx_v10_3.c, amdgpu_amdkfd_gfx_v11.c and amdgpu_amdkfd_gfx_v12.c to make the intended allocation size clearer and avoid potential overflow issues. Suggested-by: Felix Kuehling Signed-off-by: Rahul Kumar Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 04ef0ca10541..0239114fb6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -352,7 +352,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -449,7 +449,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 6d08bc2781a3..f2278a0937ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -338,7 +338,7 @@ static int hqd_dump_v10_3(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -435,7 +435,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index e0e6a6a49d90..aaccf0b9947d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -323,7 +323,7 @@ static int hqd_dump_v11(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -420,7 +420,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (7+11+1+12+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c index 6f0dc23c901b..e0ceab400b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c @@ -115,7 +115,7 @@ static int hqd_dump_v12(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -146,7 +146,7 @@ static int hqd_sdma_dump_v12(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (last_reg - first_reg + 1) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; -- cgit v1.2.3 From 2330437da0994321020777c605a2a8cb0ecb7001 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 15:10:44 +0800 Subject: drm/amd/ras: Add rascore status definition Add rascore status definition. V5: Merge the previous empty files. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/Makefile | 0 drivers/gpu/drm/amd/ras/rascore/ras_core_status.h | 37 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/Makefile create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_core_status.h diff --git a/drivers/gpu/drm/amd/ras/rascore/Makefile b/drivers/gpu/drm/amd/ras/rascore/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h new file mode 100644 index 000000000000..144fbe4ceb9a --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_CORE_STATUS_H__ +#define __RAS_CORE_STATUS_H__ + +#define RAS_CORE_OK 0 +#define RAS_CORE_NOT_SUPPORTED 248 +#define RAS_CORE_FAIL_ERROR_QUERY 249 +#define RAS_CORE_FAIL_ERROR_INJECTION 250 +#define RAS_CORE_FAIL_FATAL_RECOVERY 251 +#define RAS_CORE_FAIL_POISON_CONSUMPTION 252 +#define RAS_CORE_FAIL_POISON_CREATION 253 +#define RAS_CORE_FAIL_NO_VALID_BANKS 254 +#define RAS_CORE_GPU_IN_MODE1_RESET 255 +#endif -- cgit v1.2.3 From 51cb93aa0c4a9bb126b76f6e9fd640d88de25cee Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 11 Sep 2025 14:21:20 -0300 Subject: drm/amd/display: change dc stream color settings only in atomic commit Don't update DC stream color components during atomic check. The driver will continue validating the new CRTC color state but will not change DC stream color components. The DC stream color state will only be programmed at commit time in the `atomic_setup_commit` stage. It fixes gamma LUT loss reported by KDE users when changing brightness quickly or changing Display settings (such as overscan) with nightlight on and HDR. As KWin can do a test commit with color settings different from those that should be applied in a non-test-only commit, if the driver changes DC stream color state in atomic check, this state can be eventually HW programmed in commit tail, instead of the respective state set by the non-blocking commit. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4444 Reported-by: Xaver Hugl Signed-off-by: Melissa Wen Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 86 ++++++++++++++++------ 3 files changed, 66 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c0a0efdf44de..951eaeb7f793 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11210,7 +11210,7 @@ skip_modeset: if (dm_new_crtc_state->base.color_mgmt_changed || dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { - ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true); if (ret) goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index c41c0ee4687f..009f206226f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -1054,6 +1054,8 @@ void amdgpu_dm_init_color_mod(void); int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index b0ef157f1333..a4ac6d442278 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -566,12 +566,11 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, +static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf, const struct drm_color_lut *regamma_lut, uint32_t regamma_size, bool has_rom, enum dc_transfer_func_predefined tf) { - struct dc_transfer_func *out_tf = &stream->out_transfer_func; int ret = 0; if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { @@ -885,33 +884,33 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) } /** - * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC. * @crtc: amdgpu_dm crtc state + * @check_only: only check color state without update dc stream * - * With no plane level color management properties we're free to use any - * of the HW blocks as long as the CRTC CTM always comes before the - * CRTC RGM and after the CRTC DGM. - * - * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. - * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. - * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * This function just verifies CRTC LUT sizes, if there is enough space for + * output transfer function and if its parameters can be calculated by AMD + * color module. It also adjusts some settings for programming CRTC degamma at + * plane stage, using plane DGM block. * * The RGM block is typically more fully featured and accurate across * all ASICs - DCE can't support a custom non-linear CRTC DGM. * * For supporting both plane level color management and CRTC level color - * management at once we have to either restrict the usage of CRTC properties - * or blend adjustments together. + * management at once we have to either restrict the usage of some CRTC + * properties or blend adjustments together. * * Returns: - * 0 on success. Error code if setup fails. + * 0 on success. Error code if validation fails. */ -int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only) { struct dc_stream_state *stream = crtc->stream; struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); bool has_rom = adev->asic_type <= CHIP_RAVEN; - struct drm_color_ctm *ctm = NULL; + struct dc_transfer_func *out_tf; const struct drm_color_lut *degamma_lut, *regamma_lut; uint32_t degamma_size, regamma_size; bool has_regamma, has_degamma; @@ -940,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_has_degamma = false; crtc->cm_is_degamma_srgb = false; + if (check_only) { + out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL); + if (!out_tf) + return -ENOMEM; + } else { + out_tf = &stream->out_transfer_func; + } + /* Setup regamma and degamma. */ if (is_legacy) { /* @@ -954,8 +961,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * inverse color ramp in legacy userspace. */ crtc->cm_is_degamma_srgb = true; - stream->out_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB; + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = TRANSFER_FUNCTION_SRGB; /* * Note: although we pass has_rom as parameter here, we never * actually use ROM because the color module only takes the ROM @@ -963,16 +970,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * * See more in mod_color_calculate_regamma_params() */ - r = __set_legacy_tf(&stream->out_transfer_func, regamma_lut, + r = __set_legacy_tf(out_tf, regamma_lut, regamma_size, has_rom); - if (r) - return r; } else { regamma_size = has_regamma ? regamma_size : 0; - r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, + r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut, regamma_size, has_rom, tf); - if (r) - return r; } /* @@ -981,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * have to place the CTM in the OCSC in that case. */ crtc->cm_has_degamma = has_degamma; + if (check_only) + kvfree(out_tf); + + return r; +} + +/** + * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * @crtc: amdgpu_dm crtc state + * + * With no plane level color management properties we're free to use any + * of the HW blocks as long as the CRTC CTM always comes before the + * CRTC RGM and after the CRTC DGM. + * + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * + * The RGM block is typically more fully featured and accurate across + * all ASICs - DCE can't support a custom non-linear CRTC DGM. + * + * For supporting both plane level color management and CRTC level color + * management at once we have to either restrict the usage of CRTC properties + * or blend adjustments together. + * + * Returns: + * 0 on success. Error code if setup fails. + */ +int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) +{ + struct dc_stream_state *stream = crtc->stream; + struct drm_color_ctm *ctm = NULL; + int ret; + + ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false); + if (ret) + return ret; /* Setup CRTC CTM. */ if (crtc->base.ctm) { -- cgit v1.2.3 From 43f06e8165c4f6e16ab32ede845171ac66d4eaaa Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 9 Sep 2025 17:20:57 +0800 Subject: drm/amd/display: Optimize remove_duplicates() from O(N^2) to O(N) Replace the previous O(N^2) implementation of remove_duplicates() with a O(N) version using a fast/slow pointer approach. The new version keeps only the first occurrence of each element and compacts the array in place, improving efficiency without changing functionality. Signed-off-by: Kuan-Wei Chiu Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index e763c8e45da8..1b9579a32ff2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -48,18 +48,19 @@ static void set_reserved_time_on_all_planes_with_stream_index(struct display_con static void remove_duplicates(double *list_a, int *list_a_size) { - int cur_element = 0; - // For all elements b[i] in list_b[] - while (cur_element < *list_a_size - 1) { - if (list_a[cur_element] == list_a[cur_element + 1]) { - for (int j = cur_element + 1; j < *list_a_size - 1; j++) { - list_a[j] = list_a[j + 1]; - } - *list_a_size = *list_a_size - 1; - } else { - cur_element++; + int j = 0; + + if (*list_a_size == 0) + return; + + for (int i = 1; i < *list_a_size; i++) { + if (list_a[j] != list_a[i]) { + j++; + list_a[j] = list_a[i]; } } + + *list_a_size = j + 1; } static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -- cgit v1.2.3 From 752e6f283ec59ae007aa15a93d5a4b2eefa8cec9 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Mon, 1 Sep 2025 18:51:05 -0300 Subject: drm/amd/display: remove output_tf_change flag Remove this flag as the driver stopped managing it individually since commit a4056c2a6344 ("drm/amd/display: use HW hdr mult for brightness boost"). After some back and forth it was reintroduced as a condition to `set_output_transfer_func()` in [1]. Without direct management, this flag only changes value when all surface update flags are set true on UPDATE_TYPE_FULL with no output TF status meaning. Fixes: bb622e0c0044 ("drm/amd/display: program output tf when required") [1] Signed-off-by: Melissa Wen Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 6 ++---- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 ++---- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 79de318c85e5..98f0b6b3c213 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1392,7 +1392,6 @@ union surface_update_flags { uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; uint32_t coeff_reduction_change:1; - uint32_t output_tf_change:1; uint32_t pixel_format_change:1; uint32_t plane_size_change:1; uint32_t gamut_remap_change:1; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 717d2c4daa88..9477c9f9e196 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1982,10 +1982,8 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 625653ce5556..7c276c319086 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2032,10 +2032,8 @@ void dcn401_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we -- cgit v1.2.3 From 0c1f3fe9a5f899ac95114e68959a35454af1523d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2025 16:22:45 -0400 Subject: Documentation: add initial documenation for user queues Add an initial documentation page for user mode queues. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/index.rst | 1 + Documentation/gpu/amdgpu/userq.rst | 203 +++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 Documentation/gpu/amdgpu/userq.rst diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index bb2894b5edaf..45523e9860fc 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -12,6 +12,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. module-parameters gc/index display/index + userq flashing xgmi ras diff --git a/Documentation/gpu/amdgpu/userq.rst b/Documentation/gpu/amdgpu/userq.rst new file mode 100644 index 000000000000..ca3ea71f7888 --- /dev/null +++ b/Documentation/gpu/amdgpu/userq.rst @@ -0,0 +1,203 @@ +================== + User Mode Queues +================== + +Introduction +============ + +Similar to the KFD, GPU engine queues move into userspace. The idea is to let +user processes manage their submissions to the GPU engines directly, bypassing +IOCTL calls to the driver to submit work. This reduces overhead and also allows +the GPU to submit work to itself. Applications can set up work graphs of jobs +across multiple GPU engines without needing trips through the CPU. + +UMDs directly interface with firmware via per application shared memory areas. +The main vehicle for this is queue. A queue is a ring buffer with a read +pointer (rptr) and a write pointer (wptr). The UMD writes IP specific packets +into the queue and the firmware processes those packets, kicking off work on the +GPU engines. The CPU in the application (or another queue or device) updates +the wptr to tell the firmware how far into the ring buffer to process packets +and the rtpr provides feedback to the UMD on how far the firmware has progressed +in executing those packets. When the wptr and the rptr are equal, the queue is +idle. + +Theory of Operation +=================== + +The various engines on modern AMD GPUs support multiple queues per engine with a +scheduling firmware which handles dynamically scheduling user queues on the +available hardware queue slots. When the number of user queues outnumbers the +available hardware queue slots, the scheduling firmware dynamically maps and +unmaps queues based on priority and time quanta. The state of each user queue +is managed in the kernel driver in an MQD (Memory Queue Descriptor). This is a +buffer in GPU accessible memory that stores the state of a user queue. The +scheduling firmware uses the MQD to load the queue state into an HQD (Hardware +Queue Descriptor) when a user queue is mapped. Each user queue requires a +number of additional buffers which represent the ring buffer and any metadata +needed by the engine for runtime operation. On most engines this consists of +the ring buffer itself, a rptr buffer (where the firmware will shadow the rptr +to userspace), a wptr buffer (where the application will write the wptr for the +firmware to fetch it), and a doorbell. A doorbell is a piece of one of the +device's MMIO BARs which can be mapped to specific user queues. When the +application writes to the doorbell, it will signal the firmware to take some +action. Writing to the doorbell wakes the firmware and causes it to fetch the +wptr and start processing the packets in the queue. Each 4K page of the doorbell +BAR supports specific offset ranges for specific engines. The doorbell of a +queue must be mapped into the aperture aligned to the IP used by the queue +(e.g., GFX, VCN, SDMA, etc.). These doorbell apertures are set up via NBIO +registers. Doorbells are 32 bit or 64 bit (depending on the engine) chunks of +the doorbell BAR. A 4K doorbell page provides 512 64-bit doorbells for up to +512 user queues. A subset of each page is reserved for each IP type supported +on the device. The user can query the doorbell ranges for each IP via the INFO +IOCTL. See the IOCTL Interfaces section for more information. + +When an application wants to create a user queue, it allocates the necessary +buffers for the queue (ring buffer, wptr and rptr, context save areas, etc.). +These can be separate buffers or all part of one larger buffer. The application +would map the buffer(s) into its GPUVM and use the GPU virtual addresses of for +the areas of memory they want to use for the user queue. They would also +allocate a doorbell page for the doorbells used by the user queues. The +application would then populate the MQD in the USERQ IOCTL structure with the +GPU virtual addresses and doorbell index they want to use. The user can also +specify the attributes for the user queue (priority, whether the queue is secure +for protected content, etc.). The application would then call the USERQ +CREATE IOCTL to create the queue using the specified MQD details in the IOCTL. +The kernel driver then validates the MQD provided by the application and +translates the MQD into the engine specific MQD format for the IP. The IP +specific MQD would be allocated and the queue would be added to the run list +maintained by the scheduling firmware. Once the queue has been created, the +application can write packets directly into the queue, update the wptr, and +write to the doorbell offset to kick off work in the user queue. + +When the application is done with the user queue, it would call the USERQ +FREE IOCTL to destroy it. The kernel driver would preempt the queue and +remove it from the scheduling firmware's run list. Then the IP specific MQD +would be freed and the user queue state would be cleaned up. + +Some engines may require the aggregated doorbell too if the engine does not +support doorbells from unmapped queues. The aggregated doorbell is a special +page of doorbell space which wakes the scheduler. In cases where the engine may +be oversubscribed, some queues may not be mapped. If the doorbell is rung when +the queue is not mapped, the engine firmware may miss the request. Some +scheduling firmware may work around this by polling wptr shadows when the +hardware is oversubscribed, other engines may support doorbell updates from +unmapped queues. In the event that one of these options is not available, the +kernel driver will map a page of aggregated doorbell space into each GPUVM +space. The UMD will then update the doorbell and wptr as normal and then write +to the aggregated doorbell as well. + +Special Packets +--------------- + +In order to support legacy implicit synchronization, as well as mixed user and +kernel queues, we need a synchronization mechanism that is secure. Because +kernel queues or memory management tasks depend on kernel fences, we need a way +for user queues to update memory that the kernel can use for a fence, that can't +be messed with by a bad actor. To support this, we've added a protected fence +packet. This packet works by writing a monotonically increasing value to +a memory location that only privileged clients have write access to. User +queues only have read access. When this packet is executed, the memory location +is updated and other queues (kernel or user) can see the results. The +user application would submit this packet in their command stream. The actual +packet format varies from IP to IP (GFX/Compute, SDMA, VCN, etc.), but the +behavior is the same. The packet submission is handled in userspace. The +kernel driver sets up the privileged memory used for each user queue when it +sets the queues up when the application creates them. + + +Memory Management +================= + +It is assumed that all buffers mapped into the GPUVM space for the process are +valid when engines on the GPU are running. The kernel driver will only allow +user queues to run when all buffers are mapped. If there is a memory event that +requires buffer migration, the kernel driver will preempt the user queues, +migrate buffers to where they need to be, update the GPUVM page tables and +invaldidate the TLB, and then resume the user queues. + +Interaction with Kernel Queues +============================== + +Depending on the IP and the scheduling firmware, you can enable kernel queues +and user queues at the same time, however, you are limited by the HQD slots. +Kernel queues are always mapped so any work that goes into kernel queues will +take priority. This limits the available HQD slots for user queues. + +Not all IPs will support user queues on all GPUs. As such, UMDs will need to +support both user queues and kernel queues depending on the IP. For example, a +GPU may support user queues for GFX, compute, and SDMA, but not for VCN, JPEG, +and VPE. UMDs need to support both. The kernel driver provides a way to +determine if user queues and kernel queues are supported on a per IP basis. +UMDs can query this information via the INFO IOCTL and determine whether to use +kernel queues or user queues for each IP. + +Queue Resets +============ + +For most engines, queues can be reset individually. GFX, compute, and SDMA +queues can be reset individually. When a hung queue is detected, it can be +reset either via the scheduling firmware or MMIO. Since there are no kernel +fences for most user queues, they will usually only be detected when some other +event happens; e.g., a memory event which requires migration of buffers. When +the queues are preempted, if the queue is hung, the preemption will fail. +Driver will then look up the queues that failed to preempt and reset them and +record which queues are hung. + +On the UMD side, we will add a USERQ QUERY_STATUS IOCTL to query the queue +status. UMD will provide the queue id in the IOCTL and the kernel driver +will check if it has already recorded the queue as hung (e.g., due to failed +peemption) and report back the status. + +IOCTL Interfaces +================ + +GPU virtual addresses used for queues and related data (rptrs, wptrs, context +save areas, etc.) should be validated by the kernel mode driver to prevent the +user from specifying invalid GPU virtual addresses. If the user provides +invalid GPU virtual addresses or doorbell indicies, the IOCTL should return an +error message. These buffers should also be tracked in the kernel driver so +that if the user attempts to unmap the buffer(s) from the GPUVM, the umap call +would return an error. + +INFO +---- +There are several new INFO queries related to user queues in order to query the +size of user queue meta data needed for a user queue (e.g., context save areas +or shadow buffers), whether kernel or user queues or both are supported +for each IP type, and the offsets for each IP type in each doorbell page. + +USERQ +----- +The USERQ IOCTL is used for creating, freeing, and querying the status of user +queues. It supports 3 opcodes: + +1. CREATE - Create a user queue. The application provides an MQD-like structure + that defines the type of queue and associated metadata and flags for that + queue type. Returns the queue id. +2. FREE - Free a user queue. +3. QUERY_STATUS - Query that status of a queue. Used to check if the queue is + healthy or not. E.g., if the queue has been reset. (WIP) + +USERQ_SIGNAL +------------ +The USERQ_SIGNAL IOCTL is used to provide a list of sync objects to be signaled. + +USERQ_WAIT +---------- +The USERQ_WAIT IOCTL is used to provide a list of sync object to be waited on. + +Kernel and User Queues +====================== + +In order to properly validate and test performance, we have a driver option to +select what type of queues are enabled (kernel queues, user queues or both). +The user_queue driver parameter allows you to enable kernel queues only (0), +user queues and kernel queues (1), and user queues only (2). Enabling user +queues only will free up static queue assignments that would otherwise be used +by kernel queues for use by the scheduling firmware. Some kernel queues are +required for kernel driver operation and they will always be created. When the +kernel queues are not enabled, they are not registered with the drm scheduler +and the CS IOCTL will reject any incoming command submissions which target those +queue types. Kernel queues only mirrors the behavior on all existing GPUs. +Enabling both queues allows for backwards compatibility with old userspace while +still supporting user queues. -- cgit v1.2.3 From eb6910cdaa8a1b5af11df6fd2e9a0b2b9f72a028 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Wed, 6 Aug 2025 16:03:13 +0800 Subject: drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Split the per-instance initialization code from vcn_v5_0_1_hw_init() into a new vcn_v5_0_1_hw_init_inst() function. This improves code organization by: 1. Separating the instance-specific initialization logic 2. Making the main init function more readable 3. Following the pattern used in queue reset The SR-IOV specific initialization remains in the main function since it has different requirements. Reviewed-by: Sonny Jiang Signed-off-by: Jesse Zhang Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 9c281ba6bced..3677ea9ffa43 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -253,6 +253,23 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i) +{ + struct amdgpu_ring *ring; + int vcn_inst; + + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + + return 0; +} + /** * vcn_v5_0_1_hw_init - start and test VCN block * @@ -264,7 +281,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v5_0_1_start_sriov(adev); @@ -282,14 +299,8 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 11 * vcn_inst), - adev->vcn.inst[i].aid_id); + vcn_v5_0_1_hw_init_inst(adev, i); /* Re-init fw_shared, if required */ vcn_v5_0_1_fw_shared_init(adev, i); -- cgit v1.2.3 From dc704458dd31ba45bf9cac3fe3e3ab546dabed20 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Wed, 6 Aug 2025 16:20:28 +0800 Subject: drm/amdgpu: Add ring reset support for VCN v5.0.1 Implement the ring reset callback for VCN v5.0.1 to properly handle hardware recovery when encountering GPU hangs. The new functionality: 1. Adds vcn_v5_0_1_ring_reset() function that: - Prepares for reset using amdgpu_ring_reset_helper_begin() - Performs VCN instance reset via amdgpu_dpm_reset_vcn() - Re-initializes hardware through vcn_v5_0_1_hw_init_inst() - Restarts DPG mode with vcn_v5_0_1_start_dpg_mode() - Completes reset with amdgpu_ring_reset_helper_end() 2. Hooks the reset function into the unified ring functions via: - Adding .reset = vcn_v5_0_1_ring_reset to vcn_v5_0_1_unified_ring_vm_funcs 3. Maintains existing behavior for SR-IOV VF cases by checking RRMT status This provides proper hardware recovery capabilities for VCN 5.0.1 IP block during fault conditions, matching functionality available in other VCN versions. v2: Remove the RRMT_ENABLED cap setting in the reset function and replace adev->vcn.inst[ring->me].indirect_sram with vinst->indirect_sram (Lijo) Reviewed-by: Sonny Jiang Suggested-by: Lijo Lazar Signed-off-by: Jesse Zhang Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 3677ea9ffa43..11b931153ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1284,6 +1284,31 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + vcn_v5_0_1_hw_init_inst(adev, ring->me); + vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram); + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1312,6 +1337,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v5_0_1_ring_reset, }; /** -- cgit v1.2.3 From 5886090032ec891ccd6bf3c7e9d1ae3fe43ecc34 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Fri, 15 Aug 2025 23:44:11 +0800 Subject: drm/amdgpu: Move VCN reset mask setup to late_init for VCN 5.0.1 This patch moves the initialization of the VCN supported_reset mask from sw_init to a new late_init function for VCN 5.0.1. The change ensures that all necessary hardware and firmware initialization is complete before determining the supported reset types. Key changes: - Added vcn_v5_0_1_late_init() function to handle late initialization - Moved supported_reset mask setup from sw_init to late_init - Added check for per-queue reset support via amdgpu_dpm_reset_vcn_is_supported() - Updated ip_funcs to use the new late_init function This change helps ensure proper reset behavior by waiting until all dependencies are initialized before determining available reset types. Reviewed-by: Sonny Jiang Signed-off-by: Jesse Zhang Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 11b931153ea1..714350cabf2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -113,6 +113,25 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 12): + if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + default: + break; + } + + return 0; +} + static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { struct amdgpu_vcn5_fw_shared *fw_shared; @@ -187,10 +206,6 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) vcn_v5_0_1_fw_shared_init(adev, i); } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->vcn.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -1541,7 +1556,7 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { .name = "vcn_v5_0_1", .early_init = vcn_v5_0_1_early_init, - .late_init = NULL, + .late_init = vcn_v5_0_1_late_init, .sw_init = vcn_v5_0_1_sw_init, .sw_fini = vcn_v5_0_1_sw_fini, .hw_init = vcn_v5_0_1_hw_init, -- cgit v1.2.3 From 4c709ccc478b4b711ee071146aae98eb7fa9f625 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Mon, 22 Sep 2025 09:40:51 +0800 Subject: drm/amd/pm: Add VCN reset message support for SMU v13.0.12 This commit adds support for VCN reset functionality in SMU v13.0.12 by: 1. Adding two new PPSMC messages in smu_v13_0_12_ppsmc.h: - PPSMC_MSG_ResetVCN (0x5E) - Updates PPSMC_Message_Count to 0x5F to account for new messages 2. Adding message mapping for ResetVCN in smu_v13_0_12_ppt.c: - Maps SMU_MSG_ResetVCN to PPSMC_MSG_ResetVCN These changes enable proper VCN reset handling through the SMU firmware interface for compatible AMD GPUs. v2: Added fw version check to support vcn queue reset. Acked-by: Alex Deucher Reviewed-by: Yang Wang Reviewed-by: Sonny Jiang Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index aff2776a8b6f..4b066c42e0ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -120,7 +120,8 @@ #define PPSMC_MSG_GetBadPageSeverity 0x5B #define PPSMC_MSG_GetSystemMetricsTable 0x5C #define PPSMC_MSG_GetSystemMetricsVersion 0x5D -#define PPSMC_Message_Count 0x5E +#define PPSMC_MSG_ResetVCN 0x5E +#define PPSMC_Message_Count 0x5F //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 1842a33b2bce..cb3fea9e8cf3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -136,6 +136,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), + MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 1), }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 349b6b8be010..cbe5b06438c1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -353,6 +353,9 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } + if (fw_ver > 0x04560900) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); + if (fw_ver >= 0x04560700) { if (fw_ver >= 0x04560900) { smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); -- cgit v1.2.3 From 7469567d882374dcac3fdb8b300e0f28cf875a75 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Tue, 16 Sep 2025 13:11:06 +0800 Subject: drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails Add a fallback mechanism to attempt pipe reset when KCQ reset fails to recover the ring. After performing the KCQ reset and queue remapping, test the ring functionality. If the ring test fails, initiate a pipe reset as an additional recovery step. v2: fix the typo (Lijo) v3: try pipeline reset when kiq mapping fails (Lijo) Reviewed-by: Alex Deucher Signed-off-by: Lijo Lazar Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 8ba66d4dfe86..77f9d5b9a556 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3560,6 +3560,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE; unsigned long flags; int r; @@ -3597,6 +3598,7 @@ pipe_reset: if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) return -EOPNOTSUPP; r = gfx_v9_4_3_reset_hw_pipe(ring); + reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, r ? "failed" : "successfully"); if (r) @@ -3619,10 +3621,20 @@ pipe_reset: r = amdgpu_ring_test_ring(kiq_ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) + goto pipe_reset; + dev_err(adev->dev, "fail to remap queue\n"); return r; } + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) { + r = amdgpu_ring_test_ring(ring); + if (r) + goto pipe_reset; + } + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } -- cgit v1.2.3 From 66f3883dbc9245ca96abf97c2a4dd2a1e898b32f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Sep 2025 16:07:35 +0200 Subject: drm/amdgpu: remove leftover from enforcing isolation by VMID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially we enforced isolation by reserving a VMID, but that practice was now removed. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 5dd78a9cb12d..cbdf108612d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -594,11 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } - /* alloc a default reserved vmid to enforce isolation */ - for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - } } /** -- cgit v1.2.3 From 90e09ea4cfd4aaaf07ababa6d8c880035587e7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Sep 2025 09:27:03 +0200 Subject: drm/amdgpu: revert "rework reserved VMID handling" v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e44a0fe630c58b0a87d8281f5c1077a3479e5fce. Initially we used VMID reservation to enforce isolation between processes. That has now been replaced by proper fence handling. Both OpenGL, RADV and ROCm developers requested a way to reserve a VMID for SPM, so restore that approach by reverting back to only allowing a single process to use the reserved VMID. Only compile tested for now. v2: use -ENOENT instead of -EINVAL if VMID is not available Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 61 +++++++++++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 11 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 4 files changed, 50 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index cbdf108612d2..3ef5bc95642c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->vm_hub; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; - *id = id_mgr->reserved; + *id = vm->reserved_vmid[vmhub]; if ((*id)->owner != vm->immediate.fence_context || !amdgpu_vmid_compatible(*id, job) || (*id)->flushed_updates < updates || @@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) return vm->reserved_vmid[vmhub]; } -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm + * @adev: amdgpu device structure + * @vm: the VM to reserve an ID for + * @vmhub: the VMHUB which should be used + * + * Mostly used to have a reserved VMID for debugging and SPM. + * + * Returns: 0 for success, -ENOENT if an ID is already reserved. + */ +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id; + int r = 0; mutex_lock(&id_mgr->lock); - - ++id_mgr->reserved_use_count; - if (!id_mgr->reserved) { - struct amdgpu_vmid *id; - - id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, - list); - /* Remove from normal round robin handling */ - list_del_init(&id->list); - id_mgr->reserved = id; + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (id_mgr->reserved_vmid) { + r = -ENOENT; + goto unlock; } - + /* Remove from normal round robin handling */ + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); + list_del_init(&id->list); + vm->reserved_vmid[vmhub] = id; + id_mgr->reserved_vmid = true; mutex_unlock(&id_mgr->lock); + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; } -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_free_reserved - free up a reserved VMID again + * @adev: amdgpu device structure + * @vm: the VM with the reserved ID + * @vmhub: the VMHUB which should be used + */ +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (!--id_mgr->reserved_use_count) { - /* give the reserved ID back to normal round robin */ - list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); - id_mgr->reserved = NULL; + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + id_mgr->reserved_vmid = false; } - mutex_unlock(&id_mgr->lock); } @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); - id_mgr->reserved_use_count = 0; /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..b3649cd3af56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; - struct amdgpu_vmid *reserved; - unsigned int reserved_use_count; + bool reserved_vmid; }; int amdgpu_pasid_alloc(unsigned int bits); @@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - unsigned vmhub); -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - unsigned vmhub); +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 112ce584a5ad..8c28e8923f02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2790,10 +2790,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { - if (vm->reserved_vmid[i]) { - amdgpu_vmid_free_reserved(adev, i); - vm->reserved_vmid[i] = false; - } + amdgpu_vmid_free_reserved(adev, vm, i); } ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); @@ -2889,6 +2886,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; /* No valid flags defined yet */ if (args->in.flags) @@ -2897,17 +2895,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; - } - + amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; case AMDGPU_VM_OP_UNRESERVE_VMID: - if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; - } + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 988e970d9e96..adc5c9161fa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -415,7 +415,7 @@ struct amdgpu_vm { struct dma_fence *last_unlocked; unsigned int pasid; - bool reserved_vmid[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; -- cgit v1.2.3 From 883bd89d00085c2c5f1efcd25861745cb039f9e3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 19 Sep 2025 14:31:50 +0800 Subject: drm/amdgpu/userq: assign an error code for invalid userq va It should return an error code if userq VA validation fails. Fixes: 9e46b8bb0539 ("drm/amdgpu: validate userq buffer virtual address and size") Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index a22e6025de61..48e0932f5b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -71,6 +71,7 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, return 0; } + r = -EINVAL; out_err: amdgpu_bo_unreserve(vm->root.bo); return r; @@ -508,6 +509,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + r = -EINVAL; kfree(queue); goto unlock; } -- cgit v1.2.3 From 4e3b45d7b6c36d7d1b9a30b13d2dfa890e7a0763 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Sep 2025 18:03:07 +0530 Subject: drm/amdgpu: remove the redeclaration of variable i MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable "i" has been redeclared as integer later in the function which is wrong and not serving any purpose. Fixes: 899fbde14646 ("drm/amdgpu: replace get_user_pages with HMM mirror helpers") Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 744e6ff69814..9cd7741d2254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -884,7 +884,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - int i; r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); if (r) -- cgit v1.2.3 From 0fb915d64d99b2cd6164fd0c5304457aa417ea3c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Sep 2025 11:16:20 -0500 Subject: drm/amd/display: Only enable common modes for eDP and LVDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] The main reason common modes are added is for compatibility with clone mode when a laptop is connected to a projector or external monitor. Since commit 978fa2f6d0b12 ("drm/amd/display: Use scaling for non-native resolutions on eDP") when non-native modes are picked for eDP the GPU scalar will be used. This is because it is inconsistent whether eDP panels have the capability to actually drive non-native resolutions. With panels connected to other connectors this limitation generally doesn't exist as we the EDID will advertise support for a number of resolutions and monitors will use built in scaling hardware. Comparing DC and non-DC code paths the non-DC code path only adds common modes for LVDS and eDP whereas the DC codepath does it for all connector types. In the past there was an experiment done to disable common mode adding for eDP and LVDS from commit 6d396e7ac1ce3 ("drm/amd/display: Disable common modes for LVDS") and commit 7948afb46af92 ("drm/amd/display: Disable common modes for eDP") but this was reverted in commit a8b79b09185de ("drm/amd: Re-enable common modes for eDP and LVDS") because it caused problems with Xorg. [How] Only add common modes for eDP and LVDS for DC, matching the behavior of non-DC. Suggested-by: Timur Kristóf Reviewed-by: Harry Wentland Reviewed-by: Timur Kristóf Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20250924161624.1975819-2-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 951eaeb7f793..8e1622bf7a42 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8271,6 +8271,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, {"1920x1200", 1920, 1200} }; + if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) && + (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) + return; + n = ARRAY_SIZE(common_modes); for (i = 0; i < n; i++) { -- cgit v1.2.3 From 210844d2c075e12927507097b7ac9ae7a4ae1c15 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Sep 2025 11:16:21 -0500 Subject: drm/amd: Drop unnecessary check in amdgpu_connector_add_common_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] amdgpu_connector_add_common_modes() has a check for the width and height of common modes being too small, but the array of common_modes[] has fixed values. The check is dead code. [How] Drop unnecessary check. Cc: Timur Kristóf Reviewed-by: Timur Kristóf Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20250924161624.1975819-3-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index bf38fc69c1cf..9b915f11ccac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -434,8 +434,6 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, common_modes[i].h == native_mode->vdisplay)) continue; } - if (common_modes[i].w < 320 || common_modes[i].h < 200) - continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); if (!mode) -- cgit v1.2.3 From 118800b0797a046adaa2a8e9dee9b971b78802a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 24 Sep 2025 13:38:34 +0200 Subject: drm/amd/display: Reject modes with too high pixel clock on DCE6-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reject modes with a pixel clock higher than the maximum display clock. Use 400 MHz as a fallback value when the maximum display clock is not known. Pixel clocks that are higher than the display clock just won't work and are not supported. With the addition of the YUV422 fallback, DC can now accidentally select a mode requiring higher pixel clock than actually supported when the DP version supports the required bandwidth but the clock is otherwise too high for the display engine. DCE 6-10 don't support these modes but they don't have a bandwidth calculation to reject them properly. Fixes: db291ed1732e ("drm/amd/display: Add fallback path for YCBCR422") Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 3 +++ drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 5 +++++ .../gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c | 10 +++++++++- drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c | 10 +++++++++- drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c | 10 +++++++++- 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index dbd6ef1b60a0..6131ede2db7a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -463,6 +463,9 @@ void dce_clk_mgr_construct( clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID; + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + dce_clock_read_integrated_info(clk_mgr); dce_clock_read_ss_info(clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index a39641a0ff09..69dd80d9f738 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -147,6 +147,8 @@ void dce60_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_internal *clk_mgr) { + struct clk_mgr *base = &clk_mgr->base; + dce_clk_mgr_construct(ctx, clk_mgr); memcpy(clk_mgr->max_clks_by_state, @@ -157,5 +159,8 @@ void dce60_clk_mgr_construct( clk_mgr->clk_mgr_shift = &disp_clk_shift; clk_mgr->clk_mgr_mask = &disp_clk_mask; clk_mgr->base.funcs = &dce60_funcs; + + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 3a51be63f020..f36ec4edf0ae 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -29,6 +29,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" @@ -843,10 +844,17 @@ static enum dc_status dce100_validate_bandwidth( { int i; bool at_least_one_pipe = false; + struct dc_stream_state *stream = NULL; + const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) + stream = context->res_ctx.pipe_ctx[i].stream; + if (stream) { at_least_one_pipe = true; + + if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) + return DC_FAIL_BANDWIDTH_VALIDATE; + } } if (at_least_one_pipe) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index c164d2500c2a..b5433349fc7a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -34,6 +34,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce60/irq_service_dce60.h" #include "dce110/dce110_timing_generator.h" @@ -870,10 +871,17 @@ static enum dc_status dce60_validate_bandwidth( { int i; bool at_least_one_pipe = false; + struct dc_stream_state *stream = NULL; + const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) + stream = context->res_ctx.pipe_ctx[i].stream; + if (stream) { at_least_one_pipe = true; + + if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) + return DC_FAIL_BANDWIDTH_VALIDATE; + } } if (at_least_one_pipe) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 3e8b0ac11d90..538eafea82d5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -32,6 +32,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce80/irq_service_dce80.h" #include "dce110/dce110_timing_generator.h" @@ -876,10 +877,17 @@ static enum dc_status dce80_validate_bandwidth( { int i; bool at_least_one_pipe = false; + struct dc_stream_state *stream = NULL; + const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) + stream = context->res_ctx.pipe_ctx[i].stream; + if (stream) { at_least_one_pipe = true; + + if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) + return DC_FAIL_BANDWIDTH_VALIDATE; + } } if (at_least_one_pipe) { -- cgit v1.2.3 From 99d7181bca34e96fbf61bdb6844918bdd4df2814 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 16 Sep 2025 21:21:15 +0800 Subject: amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw There is race in amdgpu_amdkfd_device_fini_sw and interrupt. if amdgpu_amdkfd_device_fini_sw run in b/w kfd_cleanup_nodes and kfree(kfd), and KGD interrupt generated. kernel panic log: BUG: kernel NULL pointer dereference, address: 0000000000000098 amdgpu 0000:c8:00.0: amdgpu: Requesting 4 partitions through PSP PGD d78c68067 P4D d78c68067 kfd kfd: amdgpu: Allocated 3969056 bytes on gart PUD 1465b8067 PMD @ Oops: @002 [#1] SMP NOPTI kfd kfd: amdgpu: Total number of KFD nodes to be created: 4 CPU: 115 PID: @ Comm: swapper/115 Kdump: loaded Tainted: G S W OE K RIP: 0010:_raw_spin_lock_irqsave+0x12/0x40 Code: 89 e@ 41 5c c3 cc cc cc cc 66 66 2e Of 1f 84 00 00 00 00 00 OF 1f 40 00 Of 1f 44% 00 00 41 54 9c 41 5c fa 31 cO ba 01 00 00 00 OF b1 17 75 Ba 4c 89 e@ 41 Sc 89 c6 e8 07 38 5d RSP: 0018: ffffc90@1a6b0e28 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000018 0000000000000001 RSI: ffff8883bb623e00 RDI: 0000000000000098 ffff8883bb000000 RO8: ffff888100055020 ROO: ffff888100055020 0000000000000000 R11: 0000000000000000 R12: 0900000000000002 ffff888F2b97da0@ R14: @000000000000098 R15: ffff8883babdfo00 CS: 010 DS: 0000 ES: 0000 CRO: 0000000080050033 CR2: 0000000000000098 CR3: 0000000e7cae2006 CR4: 0000000002770ce0 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 0000000000000000 DR6: 00000000fffeO7FO DR7: 0000000000000400 PKRU: 55555554 Call Trace: kgd2kfd_interrupt+@x6b/0x1f@ [amdgpu] ? amdgpu_fence_process+0xa4/0x150 [amdgpu] kfd kfd: amdgpu: Node: 0, interrupt_bitmap: 3 YcpxFl Rant tErace amdgpu_irq_dispatch+0x165/0x210 [amdgpu] amdgpu_ih_process+0x80/0x100 [amdgpu] amdgpu: Virtual CRAT table created for GPU amdgpu_irq_handler+0x1f/@x60 [amdgpu] __handle_irq_event_percpu+0x3d/0x170 amdgpu: Topology: Add dGPU node [0x74a2:0x1002] handle_irq_event+0x5a/@xcO handle_edge_irq+0x93/0x240 kfd kfd: amdgpu: KFD node 1 partition @ size 49148M asm_call_irq_on_stack+0xf/@x20 common_interrupt+0xb3/0x130 asm_common_interrupt+0x1le/0x40 5.10.134-010.a1i5000.a18.x86_64 #1 Signed-off-by: Yifan Zhang Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 349c351e242b..051a00152b08 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1133,7 +1133,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) } for (i = 0; i < kfd->num_nodes; i++) { - node = kfd->nodes[i]; + /* Race if another thread in b/w + * kfd_cleanup_nodes and kfree(kfd), + * when kfd->nodes[i] = NULL + */ + if (kfd->nodes[i]) + node = kfd->nodes[i]; + else + return; + spin_lock_irqsave(&node->interrupt_lock, flags); if (node->interrupts_active -- cgit v1.2.3 From 45da20e00d5da842e17dfc633072b127504f0d0e Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 24 Sep 2025 23:19:14 +0800 Subject: amd/amdkfd: enhance kfd process check in switch partition current switch partition only check if kfd_processes_table is empty. kfd_prcesses_table entry is deleted in kfd_process_notifier_release, but kfd_process tear down is in kfd_process_wq_release. consider two processes: Process A (workqueue) -> kfd_process_wq_release -> Access kfd_node member Process B switch partition -> amdgpu_xcp_pre_partition_switch -> amdgpu_amdkfd_device_fini_sw -> kfd_node tear down. Process A and B may trigger a race as shown in dmesg log. This patch is to resolve the race by adding an atomic kfd_process counter kfd_processes_count, it increment as create kfd process, decrement as finish kfd_process_wq_release. v2: Put kfd_processes_count per kfd_dev, move decrement to kfd_process_destroy_pdds and bug fix. (Philip Yang) [3966658.307702] divide error: 0000 [#1] SMP NOPTI [3966658.350818] i10nm_edac [3966658.356318] CPU: 124 PID: 38435 Comm: kworker/124:0 Kdump: loaded Tainted [3966658.356890] Workqueue: kfd_process_wq kfd_process_wq_release [amdgpu] [3966658.362839] nfit [3966658.366457] RIP: 0010:kfd_get_num_sdma_engines+0x17/0x40 [amdgpu] [3966658.366460] Code: 00 00 e9 ac 81 02 00 66 66 2e 0f 1f 84 00 00 00 00 00 90 0f 1f 44 00 00 48 8b 4f 08 48 8b b7 00 01 00 00 8b 81 58 26 03 00 99 be b8 01 00 00 80 b9 70 2e 00 00 00 74 0b 83 f8 02 ba 02 00 00 [3966658.380967] x86_pkg_temp_thermal [3966658.391529] RSP: 0018:ffffc900a0edfdd8 EFLAGS: 00010246 [3966658.391531] RAX: 0000000000000008 RBX: ffff8974e593b800 RCX: ffff888645900000 [3966658.391531] RDX: 0000000000000000 RSI: ffff888129154400 RDI: ffff888129151c00 [3966658.391532] RBP: ffff8883ad79d400 R08: 0000000000000000 R09: ffff8890d2750af4 [3966658.391532] R10: 0000000000000018 R11: 0000000000000018 R12: 0000000000000000 [3966658.391533] R13: ffff8883ad79d400 R14: ffffe87ff662ba00 R15: ffff8974e593b800 [3966658.391533] FS: 0000000000000000(0000) GS:ffff88fe7f600000(0000) knlGS:0000000000000000 [3966658.391534] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3966658.391534] CR2: 0000000000d71000 CR3: 000000dd0e970004 CR4: 0000000002770ee0 [3966658.391535] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [3966658.391535] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [3966658.391536] PKRU: 55555554 [3966658.391536] Call Trace: [3966658.391674] deallocate_sdma_queue+0x38/0xa0 [amdgpu] [3966658.391762] process_termination_cpsch+0x1ed/0x480 [amdgpu] [3966658.399754] intel_powerclamp [3966658.402831] kfd_process_dequeue_from_all_devices+0x5b/0xc0 [amdgpu] [3966658.402908] kfd_process_wq_release+0x1a/0x1a0 [amdgpu] [3966658.410516] coretemp [3966658.434016] process_one_work+0x1ad/0x380 [3966658.434021] worker_thread+0x49/0x310 [3966658.438963] kvm_intel [3966658.446041] ? process_one_work+0x380/0x380 [3966658.446045] kthread+0x118/0x140 [3966658.446047] ? __kthread_bind_mask+0x60/0x60 [3966658.446050] ret_from_fork+0x1f/0x30 [3966658.446053] Modules linked in: kpatch_20765354(OEK) [3966658.455310] kvm [3966658.464534] mptcp_diag xsk_diag raw_diag unix_diag af_packet_diag netlink_diag udp_diag act_pedit act_mirred act_vlan cls_flower kpatch_21951273(OEK) kpatch_18424469(OEK) kpatch_19749756(OEK) [3966658.473462] idxd_mdev [3966658.482306] kpatch_17971294(OEK) sch_ingress xt_conntrack amdgpu(OE) amdxcp(OE) amddrm_buddy(OE) amd_sched(OE) amdttm(OE) amdkcl(OE) intel_ifs iptable_mangle tcm_loop target_core_pscsi tcp_diag target_core_file inet_diag target_core_iblock target_core_user target_core_mod coldpgs kpatch_18383292(OEK) ip6table_nat ip6table_filter ip6_tables ip_set_hash_ipportip ip_set_hash_ipportnet ip_set_hash_ipport ip_set_bitmap_port xt_comment iptable_nat nf_nat iptable_filter ip_tables ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 sn_core_odd(OE) i40e overlay binfmt_misc tun bonding(OE) aisqos(OE) aisqos_hotfixes(OE) rfkill uio_pci_generic uio cuse fuse nf_tables nfnetlink intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common i10nm_edac nfit x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm idxd_mdev [3966658.491237] vfio_pci [3966658.501196] vfio_pci vfio_virqfd mdev vfio_iommu_type1 vfio iax_crypto intel_pmt_telemetry iTCO_wdt intel_pmt_class iTCO_vendor_support irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel rapl intel_cstate snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_hwdep snd_seq [3966658.508537] vfio_virqfd [3966658.517569] snd_seq_device ipmi_ssif isst_if_mbox_pci isst_if_mmio pcspkr snd_pcm idxd intel_uncore ses isst_if_common intel_vsec idxd_bus enclosure snd_timer mei_me snd i2c_i801 i2c_smbus mei i2c_ismt soundcore joydev acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad vfat fat [3966658.526851] mdev [3966658.536096] nfsd auth_rpcgss nfs_acl lockd grace slb_vtoa(OE) sunrpc dm_mod hookers mlx5_ib(OE) ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm_ttm_helper ttm mlx5_core(OE) mlxfw(OE) [3966658.540381] vfio_iommu_type1 [3966658.544341] nvme mpt3sas tls drm nvme_core pci_hyperv_intf raid_class psample libcrc32c crc32c_intel mlxdevm(OE) i2c_core [3966658.551254] vfio [3966658.558742] scsi_transport_sas wmi pinctrl_emmitsburg sd_mod t10_pi sg ahci libahci libata rdma_ucm(OE) ib_uverbs(OE) rdma_cm(OE) iw_cm(OE) ib_cm(OE) ib_umad(OE) ib_core(OE) ib_ucm(OE) mlx_compat(OE) [3966658.563004] iax_crypto [3966658.570988] [last unloaded: diagnose] [3966658.571027] ---[ end trace cc9dbb180f9ae537 ]--- Signed-off-by: Yifan Zhang Reviewed-by: Philip.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 051a00152b08..e9cfb80bd436 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) mutex_init(&kfd->doorbell_mutex); ida_init(&kfd->doorbell_ida); + atomic_set(&kfd->kfd_processes_count, 0); return kfd; } @@ -1493,6 +1494,15 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd) mutex_lock(&kfd_processes_mutex); + /* kfd_processes_count is per kfd_dev, return -EBUSY without + * further check + */ + if (!!atomic_read(&kfd->kfd_processes_count)) { + pr_debug("process_wq_release not finished\n"); + r = -EBUSY; + goto out; + } + if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd)) goto out; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d01ef5ac0766..70ef051511bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -382,6 +382,8 @@ struct kfd_dev { /* for dynamic partitioning */ int kfd_dev_lock; + + atomic_t kfd_processes_count; }; enum kfd_mempool { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5be28c6c4f6a..ddfe30c13e9d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->runtime_inuse = false; } + atomic_dec(&pdd->dev->kfd->kfd_processes_count); + kfree(pdd); p->pdds[i] = NULL; } @@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); + atomic_inc(&dev->kfd->kfd_processes_count); + return pdd; } -- cgit v1.2.3 From b8ae2640f9acd4f411c9227d2493755d03fe440a Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Wed, 24 Sep 2025 16:00:06 +0800 Subject: drm/amdgpu: Fix fence signaling race condition in userqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes a potential race condition in the userqueue fence signaling mechanism by replacing dma_fence_is_signaled_locked() with dma_fence_is_signaled(). The issue occurred because: 1. dma_fence_is_signaled_locked() should only be used when holding the fence's individual lock, not just the fence list lock 2. Using the locked variant without the proper fence lock could lead to double-signaling scenarios: - Hardware completion signals the fence - Software path also tries to signal the same fence By using dma_fence_is_signaled() instead, we properly handle the locking hierarchy and avoid the race condition while still maintaining the necessary synchronization through the fence_list_lock. v2: drop the comment (Christian) Reviewed-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 95e91d1dc58a..761bad98da3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -284,7 +284,7 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, /* Check if hardware has already processed the job */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled_locked(fence)) + if (!dma_fence_is_signaled(fence)) list_add_tail(&userq_fence->link, &fence_drv->fences); else dma_fence_put(fence); -- cgit v1.2.3 From ee352f6c56e1775b192f2d39ad45362148e1fd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 24 Sep 2025 13:38:35 +0200 Subject: drm/amd/display: Share dce100_validate_bandwidth with DCE6-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DCE6-8 have very similar capabilities to DCE10, they support the same DP and HDMI versions and work similarly. Share dce100_validate_bandwidth between DCE6-10 to reduce code duplication in the DC driver. Signed-off-by: Timur Kristóf Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../display/dc/resource/dce100/dce100_resource.c | 13 ++++++- .../display/dc/resource/dce100/dce100_resource.h | 5 +++ .../amd/display/dc/resource/dce60/dce60_resource.c | 43 +--------------------- .../amd/display/dc/resource/dce80/dce80_resource.c | 34 +---------------- 4 files changed, 18 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index f36ec4edf0ae..6421a56ffd23 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -837,7 +837,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -static enum dc_status dce100_validate_bandwidth( +enum dc_status dce100_validate_bandwidth( struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode) @@ -862,7 +862,16 @@ static enum dc_status dce100_validate_bandwidth( context->bw_ctx.bw.dce.dispclk_khz = 681000; context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; + /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and + * the DP clock, and shouldn't be turned off. Just select the display + * clock value from its low power mode. + */ + if (dc->ctx->dce_version == DCE_VERSION_6_0 || + dc->ctx->dce_version == DCE_VERSION_6_4) + context->bw_ctx.bw.dce.dispclk_khz = 352000; + else + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h index fecab7c560f5..08e045601a77 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h @@ -41,6 +41,11 @@ struct resource_pool *dce100_create_resource_pool( enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); +enum dc_status dce100_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + enum dc_validate_mode validate_mode); + enum dc_status dce100_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index b5433349fc7a..61ad3703461e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -864,47 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce60_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - enum dc_validate_mode validate_mode) -{ - int i; - bool at_least_one_pipe = false; - struct dc_stream_state *stream = NULL; - const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - stream = context->res_ctx.pipe_ctx[i].stream; - if (stream) { - at_least_one_pipe = true; - - if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) - return DC_FAIL_BANDWIDTH_VALIDATE; - } - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and - * the DP clock, and shouldn't be turned off. Just select the display - * clock value from its low power mode. - */ - if (dc->ctx->dce_version == DCE_VERSION_6_0 || - dc->ctx->dce_version == DCE_VERSION_6_4) - context->bw_ctx.bw.dce.dispclk_khz = 352000; - else - context->bw_ctx.bw.dce.dispclk_khz = 0; - - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - static bool dce60_validate_surface_sets( struct dc_state *context) { @@ -948,7 +907,7 @@ static const struct resource_funcs dce60_res_pool_funcs = { .destroy = dce60_destroy_resource_pool, .link_enc_create = dce60_link_encoder_create, .panel_cntl_create = dce60_panel_cntl_create, - .validate_bandwidth = dce60_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, .validate_global = dce60_validate_global, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 538eafea82d5..0c9a39bf7ff4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -870,38 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce80_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - enum dc_validate_mode validate_mode) -{ - int i; - bool at_least_one_pipe = false; - struct dc_stream_state *stream = NULL; - const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - stream = context->res_ctx.pipe_ctx[i].stream; - if (stream) { - at_least_one_pipe = true; - - if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) - return DC_FAIL_BANDWIDTH_VALIDATE; - } - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - static bool dce80_validate_surface_sets( struct dc_state *context) { @@ -945,7 +913,7 @@ static const struct resource_funcs dce80_res_pool_funcs = { .destroy = dce80_destroy_resource_pool, .link_enc_create = dce80_link_encoder_create, .panel_cntl_create = dce80_panel_cntl_create, - .validate_bandwidth = dce80_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, .validate_global = dce80_validate_global, -- cgit v1.2.3 From 1f721ebcf312df88c6da6457e0ff21c33613f73c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 24 Sep 2025 13:38:36 +0200 Subject: drm/amd/display: Share dce100_validate_global with DCE6-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dce100_validate_global function was verbatim exactly the same as dce60_validate_global and dce80_validate_global. Share dce100_validate_global between DCE6-10 to save code size. Signed-off-by: Timur Kristóf Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../display/dc/resource/dce100/dce100_resource.c | 2 +- .../display/dc/resource/dce100/dce100_resource.h | 4 +++ .../amd/display/dc/resource/dce60/dce60_resource.c | 32 +--------------------- .../amd/display/dc/resource/dce80/dce80_resource.c | 32 +--------------------- 4 files changed, 7 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 6421a56ffd23..c4b4dc3ad8c9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -898,7 +898,7 @@ static bool dce100_validate_surface_sets( return true; } -static enum dc_status dce100_validate_global( +enum dc_status dce100_validate_global( struct dc *dc, struct dc_state *context) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h index 08e045601a77..dd150a4b4610 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h @@ -41,6 +41,10 @@ struct resource_pool *dce100_create_resource_pool( enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); +enum dc_status dce100_validate_global( + struct dc *dc, + struct dc_state *context); + enum dc_status dce100_validate_bandwidth( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 61ad3703461e..53c67ebe779f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -864,36 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool) } } -static bool dce60_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce60_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce60_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce60_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -910,7 +880,7 @@ static const struct resource_funcs dce60_res_pool_funcs = { .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce60_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 0c9a39bf7ff4..5b7769745202 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -870,36 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool) } } -static bool dce80_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce80_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce80_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce80_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -916,7 +886,7 @@ static const struct resource_funcs dce80_res_pool_funcs = { .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; -- cgit v1.2.3 From 123a1750c5e0dcbfec953647045947be9620a7d8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Sep 2025 11:16:22 -0500 Subject: drm/amd: Use dynamic array size declaration for amdgpu_connector_add_common_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Adding or removing a mode from common_modes[] can be fragile if a user forgot to update the for loop boundaries. [How] Use ARRAY_SIZE() to detect size of the array and use that instead. Cc: Timur Kristóf Reviewed-by: Alex Deucher Reviewed-by: Timur Kristóf Link: https://lore.kernel.org/r/20250924161624.1975819-4-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 9b915f11ccac..5e2831ba9730 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -398,10 +398,11 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, struct drm_display_mode *mode = NULL; struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; int i; + int n; static const struct mode_size { int w; int h; - } common_modes[17] = { + } common_modes[] = { { 640, 480}, { 720, 480}, { 800, 600}, @@ -421,7 +422,9 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, {1920, 1200} }; - for (i = 0; i < 17; i++) { + n = ARRAY_SIZE(common_modes); + + for (i = 0; i < n; i++) { if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) { if (common_modes[i].w > 1024 || common_modes[i].h > 768) -- cgit v1.2.3 From dbf2341569dfbc61ff34c32de988bc058d0644d9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 15 Nov 2024 08:56:33 -0500 Subject: drm/amdgpu: update MODULE_PARM_DESC for freesync_video MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better describe what it does. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3756 Reviewed-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ece251cbe8c3..4905efa63ddc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -960,7 +960,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); */ MODULE_PARM_DESC( freesync_video, - "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); + "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)"); module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); /** -- cgit v1.2.3 From 6d622755bc9774a1edcb85325951f4c4a8ba9a55 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Sep 2025 11:16:23 -0500 Subject: drm/amd: Drop some common modes from amdgpu_connector_add_common_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] DC and non-DC codepaths have different sets of common modes that are added for eDP and LVDS cases. This can cause different behaviors for turning on DC on hardware that can support both. [How] Drop extra modes from amdgpu_connector_add_common_modes() not present in amdgpu_dm_connector_add_common_modes(). Cc: Timur Kristóf Reviewed-by: Timur Kristóf Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20250924161624.1975819-5-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 5e2831ba9730..0ab22b919da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -404,18 +404,12 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, int h; } common_modes[] = { { 640, 480}, - { 720, 480}, { 800, 600}, - { 848, 480}, {1024, 768}, - {1152, 768}, {1280, 720}, {1280, 800}, - {1280, 854}, - {1280, 960}, {1280, 1024}, {1440, 900}, - {1400, 1050}, {1680, 1050}, {1600, 1200}, {1920, 1080}, -- cgit v1.2.3 From df2ba5709416ac6ce8dab1d141fc246b79549e78 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Sep 2025 11:16:24 -0500 Subject: drm/amd: Add name to modes from amdgpu_connector_add_common_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] When DC adds common modes it adds modes with a string to match what they are. Non-DC doesn't. This can be inconsistent when turning on/off DC support. [How] Add a name member to common_modes[] and copy it into the drm display mode. Cc: Timur Kristóf Reviewed-by: Alex Deucher Reviewed-by: Timur Kristóf Link: https://lore.kernel.org/r/20250924161624.1975819-6-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 0ab22b919da5..47e9bfba0642 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -399,21 +399,22 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; int i; int n; - static const struct mode_size { + struct mode_size { + char name[DRM_DISPLAY_MODE_LEN]; int w; int h; } common_modes[] = { - { 640, 480}, - { 800, 600}, - {1024, 768}, - {1280, 720}, - {1280, 800}, - {1280, 1024}, - {1440, 900}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200} + { "640x480", 640, 480}, + { "800x600", 800, 600}, + { "1024x768", 1024, 768}, + { "1280x720", 1280, 720}, + { "1280x800", 1280, 800}, + {"1280x1024", 1280, 1024}, + { "1440x900", 1440, 900}, + {"1680x1050", 1680, 1050}, + {"1600x1200", 1600, 1200}, + {"1920x1080", 1920, 1080}, + {"1920x1200", 1920, 1200} }; n = ARRAY_SIZE(common_modes); @@ -435,6 +436,7 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); if (!mode) return; + strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN); drm_mode_probed_add(connector, mode); } -- cgit v1.2.3