From 62498af7b1082d76031f98e4c3d6fb2b9af47772 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Wed, 16 Jul 2025 19:51:22 +0200 Subject: dt-bindings: cpufreq: Add mediatek,mt8196-cpufreq-hw binding The MediaTek MT8196 SoC has new cpufreq hardware, with added memory register ranges to control Dynamic-Voltage-Frequency-Scaling. The DVFS hardware is controlled through a set of registers referred to as "FDVFS". They set the target frequency the DVFS hardware should aim for for each performance domain. Instead of working around the old binding and its already established meanings for the reg items, add a new binding. The FDVFS register memory region is at the beginning, which allows us to easily expand this binding for future SoCs which may have more than 3 performance domains. Signed-off-by: Nicolas Frattaroli Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- .../cpufreq/mediatek,mt8196-cpufreq-hw.yaml | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml diff --git a/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml b/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml new file mode 100644 index 000000000000..5f3c7db3f3aa --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/cpufreq/mediatek,mt8196-cpufreq-hw.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek Hybrid CPUFreq for MT8196/MT6991 series SoCs + +maintainers: + - Nicolas Frattaroli + +description: + MT8196 uses CPUFreq management hardware that supports dynamic voltage + frequency scaling (dvfs), and can support several performance domains. + +properties: + compatible: + const: mediatek,mt8196-cpufreq-hw + + reg: + items: + - description: FDVFS control register region + - description: OPP tables and control for performance domain 0 + - description: OPP tables and control for performance domain 1 + - description: OPP tables and control for performance domain 2 + + "#performance-domain-cells": + const: 1 + +required: + - compatible + - reg + - "#performance-domain-cells" + +additionalProperties: false + +examples: + - | + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a720"; + enable-method = "psci"; + performance-domains = <&performance 0>; + reg = <0x000>; + }; + + /* ... */ + + cpu6: cpu@600 { + device_type = "cpu"; + compatible = "arm,cortex-x4"; + enable-method = "psci"; + performance-domains = <&performance 1>; + reg = <0x600>; + }; + + cpu7: cpu@700 { + device_type = "cpu"; + compatible = "arm,cortex-x925"; + enable-method = "psci"; + performance-domains = <&performance 2>; + reg = <0x700>; + }; + }; + + /* ... */ + + soc { + #address-cells = <2>; + #size-cells = <2>; + + performance: performance-controller@c2c2034 { + compatible = "mediatek,mt8196-cpufreq-hw"; + reg = <0 0xc220400 0 0x20>, <0 0xc2c0f20 0 0x120>, + <0 0xc2c1040 0 0x120>, <0 0xc2c1160 0 0x120>; + #performance-domain-cells = <1>; + }; + }; -- cgit v1.2.3 From 2f5178abb9f5164061b9cbbb5dd860f4d206b03a Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Wed, 16 Jul 2025 19:51:23 +0200 Subject: cpufreq: mediatek-hw: Refactor match data into struct While the driver could get away with having the per-compatible match data just be an array of the reg offsets, the only thing it used it for right now, this doesn't really allow it to be extended in any meaningful way if some other per-variant information needs to be communicated. In preparation of adding support for hybrid "FDVFS" for MT8196, refactor the code to make the DT match data a struct, which currently only contains a single member: the reg offsets. This will allow this struct to be extended with other members for other hardware variants. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nicolas Frattaroli Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 74f1b4c796e4..b2aba1842226 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -41,15 +41,22 @@ struct mtk_cpufreq_data { struct resource *res; void __iomem *base; int nr_opp; + const struct mtk_cpufreq_variant *variant; }; -static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { - [REG_FREQ_LUT_TABLE] = 0x0, - [REG_FREQ_ENABLE] = 0x84, - [REG_FREQ_PERF_STATE] = 0x88, - [REG_FREQ_HW_STATE] = 0x8c, - [REG_EM_POWER_TBL] = 0x90, - [REG_FREQ_LATENCY] = 0x110, +struct mtk_cpufreq_variant { + const u16 reg_offsets[REG_ARRAY_SIZE]; +}; + +static const struct mtk_cpufreq_variant cpufreq_mtk_base_variant = { + .reg_offsets = { + [REG_FREQ_LUT_TABLE] = 0x0, + [REG_FREQ_ENABLE] = 0x84, + [REG_FREQ_PERF_STATE] = 0x88, + [REG_FREQ_HW_STATE] = 0x8c, + [REG_EM_POWER_TBL] = 0x90, + [REG_FREQ_LATENCY] = 0x110, + }, }; static int __maybe_unused @@ -157,7 +164,7 @@ static int mtk_cpu_create_freq_table(struct platform_device *pdev, static int mtk_cpu_resources_init(struct platform_device *pdev, struct cpufreq_policy *policy, - const u16 *offsets) + const struct mtk_cpufreq_variant *variant) { struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; @@ -200,9 +207,10 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, data->base = base; data->res = res; + data->variant = variant; for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) - data->reg_bases[i] = base + offsets[i]; + data->reg_bases[i] = base + variant->reg_offsets[i]; ret = mtk_cpu_create_freq_table(pdev, data); if (ret) { @@ -336,7 +344,7 @@ static void mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) } static const struct of_device_id mtk_cpufreq_hw_match[] = { - { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets }, + { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_base_variant }, {} }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match); -- cgit v1.2.3 From 35eb6b78854d2e9671ce8ee6b735aa0114c48da8 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Wed, 16 Jul 2025 19:51:24 +0200 Subject: cpufreq: mediatek-hw: Separate per-domain and per-instance data As it stood, the mediatek cpufreq driver could get away with never really having a private driver instance struct. This is because all data was stored in the per-domain structs. However, this complicates matters when actual per-instance data like the variant struct is introduced. Instead of having a pointer to it for every domain, have a pointer to a global "priv" struct that can be extended over time, and rename the "data" struct to "domain" to distinguish its purpose better. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nicolas Frattaroli Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 42 ++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index b2aba1842226..53611077d0d9 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -35,13 +35,17 @@ enum { REG_ARRAY_SIZE, }; -struct mtk_cpufreq_data { +struct mtk_cpufreq_priv { + const struct mtk_cpufreq_variant *variant; +}; + +struct mtk_cpufreq_domain { + struct mtk_cpufreq_priv *parent; struct cpufreq_frequency_table *table; void __iomem *reg_bases[REG_ARRAY_SIZE]; struct resource *res; void __iomem *base; int nr_opp; - const struct mtk_cpufreq_variant *variant; }; struct mtk_cpufreq_variant { @@ -63,7 +67,7 @@ static int __maybe_unused mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, unsigned long *KHz) { - struct mtk_cpufreq_data *data; + struct mtk_cpufreq_domain *data; struct cpufreq_policy *policy; int i; @@ -90,7 +94,7 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { - struct mtk_cpufreq_data *data = policy->driver_data; + struct mtk_cpufreq_domain *data = policy->driver_data; writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); @@ -99,7 +103,7 @@ static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy, static unsigned int mtk_cpufreq_hw_get(unsigned int cpu) { - struct mtk_cpufreq_data *data; + struct mtk_cpufreq_domain *data; struct cpufreq_policy *policy; unsigned int index; @@ -118,7 +122,7 @@ static unsigned int mtk_cpufreq_hw_get(unsigned int cpu) static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { - struct mtk_cpufreq_data *data = policy->driver_data; + struct mtk_cpufreq_domain *data = policy->driver_data; unsigned int index; index = cpufreq_table_find_index_dl(policy, target_freq, false); @@ -129,7 +133,7 @@ static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, } static int mtk_cpu_create_freq_table(struct platform_device *pdev, - struct mtk_cpufreq_data *data) + struct mtk_cpufreq_domain *data) { struct device *dev = &pdev->dev; u32 temp, i, freq, prev_freq = 0; @@ -164,9 +168,9 @@ static int mtk_cpu_create_freq_table(struct platform_device *pdev, static int mtk_cpu_resources_init(struct platform_device *pdev, struct cpufreq_policy *policy, - const struct mtk_cpufreq_variant *variant) + struct mtk_cpufreq_priv *priv) { - struct mtk_cpufreq_data *data; + struct mtk_cpufreq_domain *data; struct device *dev = &pdev->dev; struct resource *res; struct of_phandle_args args; @@ -187,6 +191,8 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, index = args.args[0]; of_node_put(args.np); + data->parent = priv; + res = platform_get_resource(pdev, IORESOURCE_MEM, index); if (!res) { dev_err(dev, "failed to get mem resource %d\n", index); @@ -207,10 +213,9 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, data->base = base; data->res = res; - data->variant = variant; for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) - data->reg_bases[i] = base + variant->reg_offsets[i]; + data->reg_bases[i] = base + priv->variant->reg_offsets[i]; ret = mtk_cpu_create_freq_table(pdev, data); if (ret) { @@ -231,7 +236,7 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) { struct platform_device *pdev = cpufreq_get_driver_data(); int sig, pwr_hw = CPUFREQ_HW_STATUS | SVS_HW_STATUS; - struct mtk_cpufreq_data *data; + struct mtk_cpufreq_domain *data; unsigned int latency; int ret; @@ -270,7 +275,7 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) static void mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { - struct mtk_cpufreq_data *data = policy->driver_data; + struct mtk_cpufreq_domain *data = policy->driver_data; struct resource *res = data->res; void __iomem *base = data->base; @@ -283,7 +288,7 @@ static void mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) static void mtk_cpufreq_register_em(struct cpufreq_policy *policy) { struct em_data_callback em_cb = EM_DATA_CB(mtk_cpufreq_get_cpu_power); - struct mtk_cpufreq_data *data = policy->driver_data; + struct mtk_cpufreq_domain *data = policy->driver_data; em_dev_register_perf_domain(get_cpu_device(policy->cpu), data->nr_opp, &em_cb, policy->cpus, true); @@ -305,6 +310,7 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { + struct mtk_cpufreq_priv *priv; const void *data; int ret, cpu; struct device *cpu_dev; @@ -328,7 +334,13 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) if (!data) return -EINVAL; - platform_set_drvdata(pdev, (void *) data); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->variant = data; + + platform_set_drvdata(pdev, priv); cpufreq_mtk_hw_driver.driver_data = pdev; ret = cpufreq_register_driver(&cpufreq_mtk_hw_driver); -- cgit v1.2.3 From 32e0d669f3ac9574862a64a56c9a6dff675f8600 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Wed, 16 Jul 2025 19:51:25 +0200 Subject: cpufreq: mediatek-hw: Add support for MT8196 The MT8196 SoC uses DVFS to set a desired target frequency for each CPU core. It also uses slightly different register offsets. Add support for it, which necessitates reworking how the mmio regs are acquired, as mt8196 has the fdvfs register before the performance domain registers. I've verified with both `sysbench cpu run` and `head -c 10G \ /dev/urandom | pigz -p 8 -c - | pv -ba > /dev/null` that we don't just get a higher reported clock frequency, but that the observed performance also increases, by a factor of 2.64 in an 8 thread sysbench test. Signed-off-by: Nicolas Frattaroli Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 70 ++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 53611077d0d9..e4eadce6f937 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -24,6 +24,8 @@ #define POLL_USEC 1000 #define TIMEOUT_USEC 300000 +#define FDVFS_FDIV_HZ (26 * 1000) + enum { REG_FREQ_LUT_TABLE, REG_FREQ_ENABLE, @@ -36,7 +38,9 @@ enum { }; struct mtk_cpufreq_priv { + struct device *dev; const struct mtk_cpufreq_variant *variant; + void __iomem *fdvfs; }; struct mtk_cpufreq_domain { @@ -49,7 +53,9 @@ struct mtk_cpufreq_domain { }; struct mtk_cpufreq_variant { + int (*init)(struct mtk_cpufreq_priv *priv); const u16 reg_offsets[REG_ARRAY_SIZE]; + const bool is_hybrid_dvfs; }; static const struct mtk_cpufreq_variant cpufreq_mtk_base_variant = { @@ -63,6 +69,29 @@ static const struct mtk_cpufreq_variant cpufreq_mtk_base_variant = { }, }; +static int mtk_cpufreq_hw_mt8196_init(struct mtk_cpufreq_priv *priv) +{ + priv->fdvfs = devm_of_iomap(priv->dev, priv->dev->of_node, 0, NULL); + if (IS_ERR_OR_NULL(priv->fdvfs)) + return dev_err_probe(priv->dev, PTR_ERR(priv->fdvfs), + "failed to get fdvfs iomem\n"); + + return 0; +} + +static const struct mtk_cpufreq_variant cpufreq_mtk_mt8196_variant = { + .init = mtk_cpufreq_hw_mt8196_init, + .reg_offsets = { + [REG_FREQ_LUT_TABLE] = 0x0, + [REG_FREQ_ENABLE] = 0x84, + [REG_FREQ_PERF_STATE] = 0x88, + [REG_FREQ_HW_STATE] = 0x8c, + [REG_EM_POWER_TBL] = 0x90, + [REG_FREQ_LATENCY] = 0x114, + }, + .is_hybrid_dvfs = true, +}; + static int __maybe_unused mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, unsigned long *KHz) @@ -91,12 +120,31 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, return 0; } +static void mtk_cpufreq_hw_fdvfs_switch(unsigned int target_freq, + struct cpufreq_policy *policy) +{ + struct mtk_cpufreq_domain *data = policy->driver_data; + struct mtk_cpufreq_priv *priv = data->parent; + unsigned int cpu; + + target_freq = DIV_ROUND_UP(target_freq, FDVFS_FDIV_HZ); + for_each_cpu(cpu, policy->real_cpus) { + writel_relaxed(target_freq, priv->fdvfs + cpu * 4); + } +} + static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { struct mtk_cpufreq_domain *data = policy->driver_data; + unsigned int target_freq; - writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); + if (data->parent->fdvfs) { + target_freq = policy->freq_table[index].frequency; + mtk_cpufreq_hw_fdvfs_switch(target_freq, policy); + } else { + writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); + } return 0; } @@ -127,7 +175,10 @@ static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, index = cpufreq_table_find_index_dl(policy, target_freq, false); - writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); + if (data->parent->fdvfs) + mtk_cpufreq_hw_fdvfs_switch(target_freq, policy); + else + writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); return policy->freq_table[index].frequency; } @@ -191,6 +242,13 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, index = args.args[0]; of_node_put(args.np); + /* + * In a cpufreq with hybrid DVFS, such as the MT8196, the first declared + * register range is for FDVFS, followed by the frequency domain MMIOs. + */ + if (priv->variant->is_hybrid_dvfs) + index++; + data->parent = priv; res = platform_get_resource(pdev, IORESOURCE_MEM, index); @@ -339,6 +397,13 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) return -ENOMEM; priv->variant = data; + priv->dev = &pdev->dev; + + if (priv->variant->init) { + ret = priv->variant->init(priv); + if (ret) + return ret; + } platform_set_drvdata(pdev, priv); cpufreq_mtk_hw_driver.driver_data = pdev; @@ -357,6 +422,7 @@ static void mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) static const struct of_device_id mtk_cpufreq_hw_match[] = { { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_base_variant }, + { .compatible = "mediatek,mt8196-cpufreq-hw", .data = &cpufreq_mtk_mt8196_variant }, {} }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match); -- cgit v1.2.3 From 8640689f17fd550c3e89d2b47ecb02536c58baf3 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 9 Aug 2025 13:28:30 +0200 Subject: cpufreq: airoha: Add support for AN7583 SoC New Airoha AN7583 SoC use the same exact logic to control the CPU frequency. Add the Device compatible to the block list for cpufreq-dt-plat and to the Airoha CPUFreq driver compatible list. Signed-off-by: Christian Marangi Signed-off-by: Viresh Kumar --- drivers/cpufreq/airoha-cpufreq.c | 1 + drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/cpufreq/airoha-cpufreq.c b/drivers/cpufreq/airoha-cpufreq.c index 4fe39eadd163..b6b1cdc4d11d 100644 --- a/drivers/cpufreq/airoha-cpufreq.c +++ b/drivers/cpufreq/airoha-cpufreq.c @@ -107,6 +107,7 @@ static struct platform_driver airoha_cpufreq_driver = { }; static const struct of_device_id airoha_cpufreq_match_list[] __initconst = { + { .compatible = "airoha,an7583" }, { .compatible = "airoha,en7581" }, {}, }; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 015dd393eaba..0f954b07dcb3 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,7 @@ static const struct of_device_id allowlist[] __initconst = { * platforms using "operating-points-v2" property. */ static const struct of_device_id blocklist[] __initconst = { + { .compatible = "airoha,an7583", }, { .compatible = "airoha,en7581", }, { .compatible = "allwinner,sun50i-a100" }, -- cgit v1.2.3 From 17bd9599f07ff700b9fb7d15dfa1f1de42947a54 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sat, 9 Aug 2025 19:46:58 +0530 Subject: cpufreq: armada-37xx: use max() to calculate target_vm Use max() macro while calculating target_vm to simplify and improve the armada-37xx-cpufreq driver code. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202503251256.rrl65HgY-lkp@intel.com/ Reported-by: Yang Ruibin <11162571@vivo.com> Closes: https://lore.kernel.org/lkml/2c55fb07-b29e-43e0-8697-f75d1f0df89a@vivo.com/ Signed-off-by: Akhilesh Patil Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index f28a4435fba7..0efe403a5980 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -265,7 +265,7 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, */ target_vm = avs_map[l0_vdd_min] - 100; - target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV; + target_vm = max(target_vm, MIN_VOLT_MV); dvfs->avs[1] = armada_37xx_avs_val_match(target_vm); /* @@ -273,7 +273,7 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, * be larger than 1000mv */ target_vm = avs_map[l0_vdd_min] - 150; - target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV; + target_vm = max(target_vm, MIN_VOLT_MV); dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm); /* -- cgit v1.2.3 From 16d39e2bf96075dc266fdcf0f2507fc075c33676 Mon Sep 17 00:00:00 2001 From: BowenYu Date: Wed, 30 Jul 2025 11:06:49 +0800 Subject: cpufreq: Remove unused parameter in cppc_perf_from_fbctrs() Remove the unused parameter cppc_cpudata* cpu_data in cppc_perf_from_fbctrs(). Signed-off-by: BowenYu Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 4a17162a392d..ecbeb12f46e6 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -50,8 +50,7 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); /** @@ -87,8 +86,7 @@ static void cppc_scale_freq_workfn(struct kthread_work *work) return; } - perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs, - &fb_ctrs); + perf = cppc_perf_from_fbctrs(&cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); if (!perf) return; @@ -684,8 +682,7 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; @@ -747,8 +744,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) return 0; } - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, - &fb_ctrs_t1); + delivered_perf = cppc_perf_from_fbctrs(&fb_ctrs_t0, &fb_ctrs_t1); if (!delivered_perf) goto out_invalid_counters; -- cgit v1.2.3 From 2b5066a3a19a2870f353f8783f1ee63e61b8e371 Mon Sep 17 00:00:00 2001 From: Md Sadre Alam Date: Wed, 6 Aug 2025 16:58:06 +0530 Subject: cpufreq: qcom-nvmem: Enable cpufreq for ipq5424 IPQ5424 have different OPPs available for the CPU based on SoC variant. This can be determined through use of an eFuse register present in the silicon. Added support for ipq5424 on nvmem driver which helps to determine OPPs at runtime based on the eFuse register which has the CPU frequency limits. opp-supported-hw dt binding can be used to indicate the available OPPs for each limit. nvmem driver also creates the "cpufreq-dt" platform_device after passing the version matching data to the OPP framework so that the cpufreq-dt handles the actual cpufreq implementation. Signed-off-by: Md Sadre Alam Signed-off-by: Sricharan Ramabadhran Reviewed-by: Konrad Dybcio [ Changed '!=' based check to '==' based check ] Signed-off-by: Varadarajan Narayanan Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + drivers/cpufreq/qcom-cpufreq-nvmem.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 0f954b07dcb3..1a9d9d008002 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -192,6 +192,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,am62p5", }, { .compatible = "qcom,ipq5332", }, + { .compatible = "qcom,ipq5424", }, { .compatible = "qcom,ipq6018", }, { .compatible = "qcom,ipq8064", }, { .compatible = "qcom,ipq8074", }, diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 54f8117103c8..765a5bb81829 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -200,6 +200,10 @@ static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, case QCOM_ID_IPQ9574: drv->versions = 1 << (unsigned int)(*speedbin); break; + case QCOM_ID_IPQ5424: + case QCOM_ID_IPQ5404: + drv->versions = (*speedbin == 0x3b) ? BIT(1) : BIT(0); + break; case QCOM_ID_MSM8996SG: case QCOM_ID_APQ8096SG: drv->versions = 1 << ((unsigned int)(*speedbin) + 4); @@ -591,6 +595,7 @@ static const struct of_device_id qcom_cpufreq_match_list[] __initconst __maybe_u { .compatible = "qcom,msm8996", .data = &match_data_kryo }, { .compatible = "qcom,qcs404", .data = &match_data_qcs404 }, { .compatible = "qcom,ipq5332", .data = &match_data_kryo }, + { .compatible = "qcom,ipq5424", .data = &match_data_kryo }, { .compatible = "qcom,ipq6018", .data = &match_data_ipq6018 }, { .compatible = "qcom,ipq8064", .data = &match_data_ipq8064 }, { .compatible = "qcom,ipq8074", .data = &match_data_ipq8074 }, -- cgit v1.2.3 From 57e85e21b2729849407c9f851e30b2ff0f07523b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 11 Aug 2025 14:51:01 -0700 Subject: MAINTAINERS: Add myself as virtual-cpufreq maintainer Forgot to update the MAINTAINERS file in the initial patch that landed the driver. Add myself as the maintainer for this driver. Signed-off-by: Saravana Kannan Signed-off-by: Viresh Kumar --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..6fac119a8e95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6350,6 +6350,12 @@ F: kernel/sched/cpufreq*.c F: rust/kernel/cpufreq.rs F: tools/testing/selftests/cpufreq/ +CPU FREQUENCY DRIVERS - VIRTUAL MACHINE CPUFREQ +M: Saravana Kannan +L: linux-pm@vger.kernel.org +S: Maintained +F: drivers/cpufreq/virtual-cpufreq.c + CPU HOTPLUG M: Thomas Gleixner M: Peter Zijlstra -- cgit v1.2.3 From e98329896b5639f8437af8ff9316f20cb50d9187 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 2 Jul 2025 14:43:09 +0530 Subject: dt-bindings: cpufreq: cpufreq-qcom-hw: Add QCS615 compatible Document compatible for cpufreq hardware on Qualcomm QCS615 platform. Signed-off-by: Taniya Das Acked-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index e0242bed3342..2d42fc3d8ef8 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -22,6 +22,7 @@ properties: items: - enum: - qcom,qcm2290-cpufreq-hw + - qcom,qcs615-cpufreq-hw - qcom,sc7180-cpufreq-hw - qcom,sc8180x-cpufreq-hw - qcom,sdm670-cpufreq-hw @@ -132,6 +133,7 @@ allOf: compatible: contains: enum: + - qcom,qcs615-cpufreq-hw - qcom,qdu1000-cpufreq-epss - qcom,sa8255p-cpufreq-epss - qcom,sa8775p-cpufreq-epss -- cgit v1.2.3 From daad2ef99145215fac1cab196811e3e03ef8bc9f Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 13 Aug 2025 07:54:31 +0000 Subject: rust: cpumask: rename CpumaskVar::as[_mut]_ref to from_raw[_mut] The prefix as_* shouldn't be used for constructors. For further motivation, see commit 2f5606afa4c2 ("device: rust: rename Device::as_ref() to Device::from_raw()"). Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Reviewed-by: Yury Norov (NVIDIA) Signed-off-by: Viresh Kumar --- rust/kernel/cpufreq.rs | 2 +- rust/kernel/cpumask.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index afc15e72a7c3..eea57ba95f24 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -543,7 +543,7 @@ impl Policy { pub fn cpus(&mut self) -> &mut cpumask::Cpumask { // SAFETY: The pointer to `cpus` is valid for writing and remains valid for the lifetime of // the returned reference. - unsafe { cpumask::CpumaskVar::as_mut_ref(&mut self.as_mut_ref().cpus) } + unsafe { cpumask::CpumaskVar::from_raw_mut(&mut self.as_mut_ref().cpus) } } /// Sets clock for the [`Policy`]. diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index 3fcbff438670..e311ab9038df 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -270,7 +270,7 @@ impl CpumaskVar { /// /// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime /// of the returned reference. - pub unsafe fn as_mut_ref<'a>(ptr: *mut bindings::cpumask_var_t) -> &'a mut Self { + pub unsafe fn from_raw_mut<'a>(ptr: *mut bindings::cpumask_var_t) -> &'a mut Self { // SAFETY: Guaranteed by the safety requirements of the function. // // INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the @@ -284,7 +284,7 @@ impl CpumaskVar { /// /// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime /// of the returned reference. - pub unsafe fn as_ref<'a>(ptr: *const bindings::cpumask_var_t) -> &'a Self { + pub unsafe fn from_raw<'a>(ptr: *const bindings::cpumask_var_t) -> &'a Self { // SAFETY: Guaranteed by the safety requirements of the function. // // INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the -- cgit v1.2.3 From 23fca458f6ab18927e50c2134fb7b60297f18b4e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Tue, 12 Aug 2025 16:42:11 +0200 Subject: rust: cpumask: Mark CpumaskVar as transparent Unsafe code in CpumaskVar's methods assumes that the type has the same layout as `bindings::cpumask_var_t`. This is not guaranteed by the default struct representation in Rust, but requires specifying the `transparent` representation. Fixes: 8961b8cb3099a ("rust: cpumask: Add initial abstractions") Signed-off-by: Baptiste Lepers Reviewed-by: Alice Ryhl Signed-off-by: Viresh Kumar --- rust/kernel/cpumask.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index e311ab9038df..c1d17826ae7b 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -212,6 +212,7 @@ impl Cpumask { /// } /// assert_eq!(mask2.weight(), count); /// ``` +#[repr(transparent)] pub struct CpumaskVar { #[cfg(CONFIG_CPUMASK_OFFSTACK)] ptr: NonNull, -- cgit v1.2.3 From cd5d4621ba846dad9b2e6b0c2d1518d083fcfa13 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 18 Aug 2025 08:50:48 -0700 Subject: cpufreq: scmi: Account for malformed DT in scmi_dev_used_by_cpus() Broadcom STB platforms were early adopters (2017) of the SCMI framework and as a result, not all deployed systems have a Device Tree entry where SCMI protocol 0x13 (PERFORMANCE) is declared as a clock provider, nor are the CPU Device Tree node(s) referencing protocol 0x13 as their clock provider. This was clarified in commit e11c480b6df1 ("dt-bindings: firmware: arm,scmi: Extend bindings for protocol@13") in 2023. For those platforms, we allow the checks done by scmi_dev_used_by_cpus() to continue, and in the event of not having done an early return, we key off the documented compatible string and give them a pass to continue to use scmi-cpufreq. Fixes: 6c9bb8692272 ("cpufreq: scmi: Skip SCMI devices that aren't used by the CPUs") Signed-off-by: Florian Fainelli Reviewed-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index ef078426bfd5..38c165d526d1 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -424,6 +425,15 @@ static bool scmi_dev_used_by_cpus(struct device *scmi_dev) return true; } + /* + * Older Broadcom STB chips had a "clocks" property for CPU node(s) + * that did not match the SCMI performance protocol node, if we got + * there, it means we had such an older Device Tree, therefore return + * true to preserve backwards compatibility. + */ + if (of_machine_is_compatible("brcm,brcmstb")) + return true; + return false; } -- cgit v1.2.3 From c5746dc1898a1bd5518a03081dc7e380569e269d Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Fri, 25 Jul 2025 12:14:50 +0800 Subject: cpufreq: Avoid calling get_governor() for first policy When a cpufreq driver registers the first policy, it may attempt to initialize the policy governor from `last_governor`. However, this is meaningless for the first policy instance, because `last_governor` is only updated when policies are removed (e.g. during CPU offline). The `last_governor` mechanism is intended to restore the previously used governor across CPU hotplug events. For the very first policy, there is no "previous governor" to restore, so calling get_governor(last_governor) is unnecessary and potentially confusing. Skip looking up `last_governor` when registering the first policy. Instead, it directly uses the default governor after all governors have been registered and are available. This avoids meaningless lookups, reduces unnecessary module reference handling, and simplifies the initial policy path. Signed-off-by: Zihuan Zhang Acked-by: Viresh Kumar Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20250725041450.68754-1-zhangzihuan@kylinos.cn [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fc7eace8b65b..b8937737d096 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1121,7 +1121,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) if (has_target()) { /* Update policy governor to the one used before hotplug. */ - gov = get_governor(policy->last_governor); + if (policy->last_governor[0] != '\0') + gov = get_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", gov->name, policy->cpu); -- cgit v1.2.3 From 72907ea795e0bf261def10595d19ded441b90d9f Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Tue, 19 Aug 2025 13:21:42 +0200 Subject: cpufreq: mediatek-hw: don't use error path on NULL fdvfs The IS_ERR_OR_NULL check for priv->fdvfs is inappropriate, and should be an IS_ERR check instead, as a NULL value here would propagate it to PTR_ERR. In practice, there is no problem here, as devm_of_iomap cannot return NULL in any circumstance. However, it causes a Smatch static checker warning. Fix the warning by changing the check from IS_ERR_OR_NULL to IS_ERR. Fixes: 32e0d669f3ac ("cpufreq: mediatek-hw: Add support for MT8196") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-pm/aKQubSEXH1TXQpnR@stanley.mountain/ Signed-off-by: Nicolas Frattaroli Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index e4eadce6f937..fce5aa5ceea0 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -72,7 +72,7 @@ static const struct mtk_cpufreq_variant cpufreq_mtk_base_variant = { static int mtk_cpufreq_hw_mt8196_init(struct mtk_cpufreq_priv *priv) { priv->fdvfs = devm_of_iomap(priv->dev, priv->dev->of_node, 0, NULL); - if (IS_ERR_OR_NULL(priv->fdvfs)) + if (IS_ERR(priv->fdvfs)) return dev_err_probe(priv->dev, PTR_ERR(priv->fdvfs), "failed to get fdvfs iomem\n"); -- cgit v1.2.3 From e88ef677623eaf7a4c30b5fe905e7138838d13d2 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Fri, 15 Aug 2025 23:15:21 +0530 Subject: rust: opp: update ARef and AlwaysRefCounted imports from sync::aref Update call sites in `opp.rs` to import ARef and AlwaysRefCounted from sync::aref instead of types. This aligns with the ongoing effort to move ARef and AlwaysRefCounted to sync. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1173 Signed-off-by: Shankari Anand Reviewed-by: Benno Lossin Signed-off-by: Viresh Kumar --- rust/kernel/opp.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index 08126035d2c6..29be7d14025e 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -16,7 +16,8 @@ use crate::{ ffi::c_ulong, prelude::*, str::CString, - types::{ARef, AlwaysRefCounted, Opaque}, + sync::aref::{ARef, AlwaysRefCounted}, + types::Opaque, }; #[cfg(CONFIG_CPU_FREQ)] @@ -162,7 +163,7 @@ impl From for c_ulong { /// use kernel::device::Device; /// use kernel::error::Result; /// use kernel::opp::{Data, MicroVolt, Token}; -/// use kernel::types::ARef; +/// use kernel::sync::aref::ARef; /// /// fn create_opp(dev: &ARef, freq: Hertz, volt: MicroVolt, level: u32) -> Result { /// let data = Data::new(freq, volt, level, false); @@ -211,7 +212,7 @@ impl Drop for Token { /// use kernel::device::Device; /// use kernel::error::Result; /// use kernel::opp::{Data, MicroVolt, Token}; -/// use kernel::types::ARef; +/// use kernel::sync::aref::ARef; /// /// fn create_opp(dev: &ARef, freq: Hertz, volt: MicroVolt, level: u32) -> Result { /// let data = Data::new(freq, volt, level, false); @@ -262,7 +263,7 @@ impl Data { /// use kernel::clk::Hertz; /// use kernel::error::Result; /// use kernel::opp::{OPP, SearchType, Table}; -/// use kernel::types::ARef; +/// use kernel::sync::aref::ARef; /// /// fn find_opp(table: &Table, freq: Hertz) -> Result> { /// let opp = table.opp_from_freq(freq, Some(true), None, SearchType::Exact)?; @@ -335,7 +336,7 @@ impl Drop for ConfigToken { /// use kernel::error::Result; /// use kernel::opp::{Config, ConfigOps, ConfigToken}; /// use kernel::str::CString; -/// use kernel::types::ARef; +/// use kernel::sync::aref::ARef; /// use kernel::macros::vtable; /// /// #[derive(Default)] @@ -581,7 +582,7 @@ impl Config { /// use kernel::device::Device; /// use kernel::error::Result; /// use kernel::opp::Table; -/// use kernel::types::ARef; +/// use kernel::sync::aref::ARef; /// /// fn get_table(dev: &ARef, mask: &mut Cpumask, freq: Hertz) -> Result { /// let mut opp_table = Table::from_of_cpumask(dev, mask)?; -- cgit v1.2.3 From fa40cbe1c86b6626d548cf2eb555bee4eea7566c Mon Sep 17 00:00:00 2001 From: Paresh Bhagat Date: Wed, 20 Aug 2025 14:03:30 +0530 Subject: cpufreq: dt-platdev: Blacklist ti,am62d2 SoC Add ti,am62d2 SoC to the blacklist as the ti-cpufreq driver will handle creating the cpufreq-dt platform device after it completes and ensure it is not created twice. Signed-off-by: Paresh Bhagat Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 1a9d9d008002..cd1816a12bb9 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -189,6 +189,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,omap3", }, { .compatible = "ti,am625", }, { .compatible = "ti,am62a7", }, + { .compatible = "ti,am62d2", }, { .compatible = "ti,am62p5", }, { .compatible = "qcom,ipq5332", }, -- cgit v1.2.3 From b5af45302ebc141662b2b60c713c9202e88c943c Mon Sep 17 00:00:00 2001 From: Paresh Bhagat Date: Wed, 20 Aug 2025 14:03:31 +0530 Subject: cpufreq: ti: Add support for AM62D2 Add support for TI K3 AM62D2 SoC to read speed and revision values from hardware and pass to OPP layer. AM62D shares the same configuations as AM62A so use existing am62a7_soc_data. Signed-off-by: Paresh Bhagat Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 5a5147277cd0..9a912d309315 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -310,6 +310,7 @@ static const struct soc_device_attribute k3_cpufreq_soc[] = { { .family = "AM62X", .revision = "SR1.0" }, { .family = "AM62AX", .revision = "SR1.0" }, { .family = "AM62PX", .revision = "SR1.0" }, + { .family = "AM62DX", .revision = "SR1.0" }, { /* sentinel */ } }; @@ -457,6 +458,7 @@ static const struct of_device_id ti_cpufreq_of_match[] __maybe_unused = { { .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, }, { .compatible = "ti,am625", .data = &am625_soc_data, }, { .compatible = "ti,am62a7", .data = &am62a7_soc_data, }, + { .compatible = "ti,am62d2", .data = &am62a7_soc_data, }, { .compatible = "ti,am62p5", .data = &am62p5_soc_data, }, /* legacy */ { .compatible = "ti,omap3430", .data = &omap34xx_soc_data, }, -- cgit v1.2.3 From 03cf825911c95f39d77d2a60b35fe5b4a33115b1 Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Wed, 13 Aug 2025 15:54:33 +0800 Subject: powercap: idle_inject: use us_to_ktime() where appropriate Convert values in microseconds to ktime using us_to_ktime() instead of multiplying them by NSEC_PER_USEC and using ns_to_ktime() for the conversion. Signed-off-by: Xichao Zhao Link: https://patch.msgid.link/20250813075433.464786-1-zhao.xichao@vivo.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 5ad7cc438068..a25eb2018acd 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -133,7 +133,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) duration_us = READ_ONCE(ii_dev->run_duration_us); duration_us += READ_ONCE(ii_dev->idle_duration_us); - hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); + hrtimer_forward_now(timer, us_to_ktime(duration_us)); return HRTIMER_RESTART; } @@ -232,8 +232,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev) idle_inject_wakeup(ii_dev); hrtimer_start(&ii_dev->timer, - ns_to_ktime((idle_duration_us + run_duration_us) * - NSEC_PER_USEC), + us_to_ktime(idle_duration_us + run_duration_us), HRTIMER_MODE_REL); return 0; -- cgit v1.2.3 From 17224c1d2574d29668c4879e1fbf36d6f68cd22b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Aug 2025 12:26:35 +0200 Subject: cpuidle: governors: menu: Rearrange main loop in menu_select() Reduce the indentation level in the main loop of menu_select() by rearranging some checks and assignments in it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/2389215.ElGaqSPkdT@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 70 +++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b2e3d0b0a116..4d9aa5ce31f0 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -314,45 +314,47 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (s->exit_latency_ns > latency_req) break; - if (s->target_residency_ns > predicted_ns) { - /* - * Use a physical idle state, not busy polling, unless - * a timer is going to trigger soon enough. - */ - if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->target_residency_ns <= data->next_timer_ns) { - predicted_ns = s->target_residency_ns; - idx = i; - break; - } - if (predicted_ns < TICK_NSEC) - break; - - if (!tick_nohz_tick_stopped()) { - /* - * If the state selected so far is shallow, - * waking up early won't hurt, so retain the - * tick in that case and let the governor run - * again in the next iteration of the loop. - */ - predicted_ns = drv->states[idx].target_residency_ns; - break; - } + if (s->target_residency_ns <= predicted_ns) { + idx = i; + continue; + } + + /* + * Use a physical idle state, not busy polling, unless a timer + * is going to trigger soon enough. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + s->target_residency_ns <= data->next_timer_ns) { + predicted_ns = s->target_residency_ns; + idx = i; + break; + } + if (predicted_ns < TICK_NSEC) + break; + + if (!tick_nohz_tick_stopped()) { /* - * If the state selected so far is shallow and this - * state's target residency matches the time till the - * closest timer event, select this one to avoid getting - * stuck in the shallow one for too long. + * If the state selected so far is shallow, waking up + * early won't hurt, so retain the tick in that case and + * let the governor run again in the next iteration of + * the idle loop. */ - if (drv->states[idx].target_residency_ns < TICK_NSEC && - s->target_residency_ns <= delta_tick) - idx = i; - - return idx; + predicted_ns = drv->states[idx].target_residency_ns; + break; } - idx = i; + /* + * If the state selected so far is shallow and this state's + * target residency matches the time till the closest timer + * event, select this one to avoid getting stuck in the shallow + * one for too long. + */ + if (drv->states[idx].target_residency_ns < TICK_NSEC && + s->target_residency_ns <= delta_tick) + idx = i; + + return idx; } if (idx == -1) -- cgit v1.2.3 From f8d63d7e60e8912b5ded72319c305765b0a6322f Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Mon, 18 Aug 2025 14:26:30 -0500 Subject: cpufreq: ti: Support more speed grades on AM62Px SoC As the AM62Px SoC family matures more speed grades are being defined. Add support for speed grades U and T which both support all currently established OPPs. Signed-off-by: Judith Mendez Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 9a912d309315..d6bd0d2dcf15 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -72,7 +72,9 @@ enum { #define AM62P5_EFUSE_O_MPU_OPP 15 #define AM62P5_EFUSE_S_MPU_OPP 19 +#define AM62P5_EFUSE_T_MPU_OPP 20 #define AM62P5_EFUSE_U_MPU_OPP 21 +#define AM62P5_EFUSE_V_MPU_OPP 22 #define AM62P5_SUPPORT_O_MPU_OPP BIT(0) #define AM62P5_SUPPORT_U_MPU_OPP BIT(2) @@ -153,7 +155,9 @@ static unsigned long am62p5_efuse_xlate(struct ti_cpufreq_data *opp_data, unsigned long calculated_efuse = AM62P5_SUPPORT_O_MPU_OPP; switch (efuse) { + case AM62P5_EFUSE_V_MPU_OPP: case AM62P5_EFUSE_U_MPU_OPP: + case AM62P5_EFUSE_T_MPU_OPP: case AM62P5_EFUSE_S_MPU_OPP: calculated_efuse |= AM62P5_SUPPORT_U_MPU_OPP; fallthrough; -- cgit v1.2.3 From 37f846830b510121c5364ba7bd23e3d77a83ff9b Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Mon, 18 Aug 2025 14:26:31 -0500 Subject: cpufreq: ti: Allow all silicon revisions to support OPPs More silicon revisions are being defined for AM62x, AM62Px, and AM62ax SoCs. These silicon may also support currently establishes OPPs, so remove the revision limitation in ti-cpufreq and thus determine if an OPP applies with speed grade efuse parsing. Signed-off-by: Judith Mendez Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index d6bd0d2dcf15..6ee76f5fe9c5 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -311,10 +311,10 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { }; static const struct soc_device_attribute k3_cpufreq_soc[] = { - { .family = "AM62X", .revision = "SR1.0" }, - { .family = "AM62AX", .revision = "SR1.0" }, - { .family = "AM62PX", .revision = "SR1.0" }, - { .family = "AM62DX", .revision = "SR1.0" }, + { .family = "AM62X", }, + { .family = "AM62AX", }, + { .family = "AM62PX", }, + { .family = "AM62DX", }, { /* sentinel */ } }; -- cgit v1.2.3 From f434ec2200667d5362bd19f93a498d9b3f121588 Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Mon, 18 Aug 2025 14:26:32 -0500 Subject: arm64: dts: ti: k3-am62p: Fix supported hardware for 1GHz OPP The 1GHz OPP is supported on speed grade "O" as well according to the device datasheet [0], so fix the opp-supported-hw property to support this speed grade for 1GHz OPP. [0] https://www.ti.com/lit/gpn/am62p Fixes: 76d855f05801 ("arm64: dts: ti: k3-am62p: add opp frequencies") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Signed-off-by: Viresh Kumar --- arch/arm64/boot/dts/ti/k3-am62p5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62p5.dtsi b/arch/arm64/boot/dts/ti/k3-am62p5.dtsi index 202378d9d5cf..8982a7b9f1a6 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p5.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p5.dtsi @@ -135,7 +135,7 @@ opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; - opp-supported-hw = <0x01 0x0006>; + opp-supported-hw = <0x01 0x0007>; clock-latency-ns = <6000000>; }; -- cgit v1.2.3 From e9e124501f0d7ea2caea94711efe50fe081a11ea Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Fri, 22 Aug 2025 15:04:23 +0800 Subject: cpufreq: use strlen() for governor name comparison Most kernel code using strncasecmp()/strncmp() passes strlen("xxx") as the length argument. cpufreq_parse_policy() previously used CPUFREQ_NAME_LEN (16), which is longer than the actual strings ("performance" is 11 chars, "powersave" is 9 chars). This patch switches to strlen() for the comparison, making the matching slightly more permissive (e.g., "powersavexxx" will now also match "powersave"). While this is unlikely to cause functional issues, it aligns cpufreq with common kernel style and makes the behavior more intuitive. Signed-off-by: Zihuan Zhang Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250822070424.166795-2-zhangzihuan@kylinos.cn Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b8937737d096..d337f94f70a3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -664,10 +664,10 @@ unlock: static unsigned int cpufreq_parse_policy(char *str_governor) { - if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) + if (!strncasecmp(str_governor, "performance", strlen("performance"))) return CPUFREQ_POLICY_PERFORMANCE; - if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) + if (!strncasecmp(str_governor, "powersave", strlen("powersave"))) return CPUFREQ_POLICY_POWERSAVE; return CPUFREQ_POLICY_UNKNOWN; -- cgit v1.2.3 From 1647830388ffea4d7a39c1a5f7692925e9d8351d Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Fri, 22 Aug 2025 15:04:24 +0800 Subject: cpufreq: simplify setpolicy/target check in driver verification cpufreq drivers are supposed to use either ->setpolicy() or ->target()/->target_index(). Simplify the existing check by collapsing it into a single boolean expression: (!!driver->setpolicy == (driver->target_index || driver->target)) This is a readability/maintainability cleanup and keeps the semantics unchanged. No functional change intended. Signed-off-by: Zihuan Zhang Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250822070424.166795-3-zhangzihuan@kylinos.cn Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d337f94f70a3..a615c98d80ca 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2922,10 +2922,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) return -EPROBE_DEFER; if (!driver_data || !driver_data->verify || !driver_data->init || - !(driver_data->setpolicy || driver_data->target_index || - driver_data->target) || - (driver_data->setpolicy && (driver_data->target_index || - driver_data->target)) || + (!!driver_data->setpolicy == (driver_data->target_index || driver_data->target)) || (!driver_data->get_intermediate != !driver_data->target_intermediate) || (!driver_data->online != !driver_data->offline) || (driver_data->adjust_perf && !driver_data->fast_switch)) -- cgit v1.2.3 From 22763c35c635466545c0cea825da01c72becbcea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=96zkan?= Date: Thu, 21 Aug 2025 12:16:05 +0300 Subject: rust: opp: use to_result for error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplifies error handling by replacing the manual check of the return value with the `to_result` helper. Signed-off-by: Onur Özkan Reviewed-by: Elle Rhumsaa Signed-off-by: Viresh Kumar --- rust/kernel/opp.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index 08126035d2c6..9d79c2816af5 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -12,7 +12,7 @@ use crate::{ clk::Hertz, cpumask::{Cpumask, CpumaskVar}, device::Device, - error::{code::*, from_err_ptr, from_result, to_result, Error, Result, VTABLE_DEFAULT_ERROR}, + error::{code::*, from_err_ptr, from_result, to_result, Result, VTABLE_DEFAULT_ERROR}, ffi::c_ulong, prelude::*, str::CString, @@ -500,11 +500,8 @@ impl Config { // requirements. The OPP core guarantees not to access fields of [`Config`] after this call // and so we don't need to save a copy of them for future use. let ret = unsafe { bindings::dev_pm_opp_set_config(dev.as_raw(), &mut config) }; - if ret < 0 { - Err(Error::from_errno(ret)) - } else { - Ok(ConfigToken(ret)) - } + + to_result(ret).map(|()| ConfigToken(ret)) } /// Config's clk callback. @@ -713,11 +710,8 @@ impl Table { // SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety // requirements. let ret = unsafe { bindings::dev_pm_opp_get_opp_count(self.dev.as_raw()) }; - if ret < 0 { - Err(Error::from_errno(ret)) - } else { - Ok(ret as u32) - } + + to_result(ret).map(|()| ret as u32) } /// Returns max clock latency (in nanoseconds) of the [`OPP`]s in the [`Table`]. -- cgit v1.2.3 From 05db35963eef7a55f1782190185cb8ddb9d923b7 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Wed, 20 Aug 2025 13:58:47 +0530 Subject: OPP: Add support to find OPP for a set of keys Some clients, such as PCIe, may operate at the same clock frequency across different data rates by varying link width. In such cases, frequency alone is not sufficient to uniquely identify an OPP. To support these scenarios, introduce a new API dev_pm_opp_find_key_exact() that allows OPP lookup with different set of keys like freq, level & bandwidth. Signed-off-by: Krishna Chaitanya Chundru [ Viresh: Minor cleanups ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 30 +++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index edbd60501cf0..bba4f7daff8c 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -476,6 +476,16 @@ static unsigned long _read_bw(struct dev_pm_opp *opp, int index) return opp->bandwidth[index].peak; } +static unsigned long _read_opp_key(struct dev_pm_opp *opp, int index, + struct dev_pm_opp_key *key) +{ + key->bw = opp->bandwidth ? opp->bandwidth[index].peak : 0; + key->freq = opp->rates[index]; + key->level = opp->level; + + return true; +} + /* Generic comparison helpers */ static bool _compare_exact(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, unsigned long opp_key, unsigned long key) @@ -509,6 +519,22 @@ static bool _compare_floor(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, return false; } +static bool _compare_opp_key_exact(struct dev_pm_opp **opp, + struct dev_pm_opp *temp_opp, struct dev_pm_opp_key *opp_key, + struct dev_pm_opp_key *key) +{ + bool level_match = (key->level == OPP_LEVEL_UNSET || opp_key->level == key->level); + bool freq_match = (key->freq == 0 || opp_key->freq == key->freq); + bool bw_match = (key->bw == 0 || opp_key->bw == key->bw); + + if (freq_match && level_match && bw_match) { + *opp = temp_opp; + return true; + } + + return false; +} + /* Generic key finding helpers */ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table, unsigned long *key, int index, bool available, @@ -541,6 +567,37 @@ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table, return opp; } +static struct dev_pm_opp *_opp_table_find_opp_key(struct opp_table *opp_table, + struct dev_pm_opp_key *key, bool available, + unsigned long (*read)(struct dev_pm_opp *opp, int index, + struct dev_pm_opp_key *key), + bool (*compare)(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp, + struct dev_pm_opp_key *opp_key, struct dev_pm_opp_key *key), + bool (*assert)(struct opp_table *opp_table, unsigned int index)) +{ + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + struct dev_pm_opp_key temp_key; + + /* Assert that the requirement is met */ + if (!assert(opp_table, 0)) + return ERR_PTR(-EINVAL); + + guard(mutex)(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available == available) { + read(temp_opp, 0, &temp_key); + if (compare(&opp, temp_opp, &temp_key, key)) { + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + } + + return opp; +} + static struct dev_pm_opp * _find_key(struct device *dev, unsigned long *key, int index, bool available, unsigned long (*read)(struct dev_pm_opp *opp, int index), @@ -632,6 +689,48 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +/** + * dev_pm_opp_find_key_exact() - Search for an OPP with exact key set + * @dev: Device for which the OPP is being searched + * @key: OPP key set to match + * @available: true/false - match for available OPP + * + * Search for an exact match of the key set in the OPP table. + * + * Return: A matching opp on success, else ERR_PTR in case of error. + * Possible error values: + * EINVAL: for bad pointers + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * Note: 'available' is a modifier for the search. If 'available' == true, + * then the match is for exact matching key and is available in the stored + * OPP table. If false, the match is for exact key which is not available. + * + * This provides a mechanism to enable an OPP which is not available currently + * or the opposite as well. + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available) +{ + struct opp_table *opp_table __free(put_opp_table) = _find_opp_table(dev); + + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: OPP table not found (%ld)\n", __func__, + PTR_ERR(opp_table)); + return ERR_CAST(opp_table); + } + + return _opp_table_find_opp_key(opp_table, key, available, + _read_opp_key, _compare_opp_key_exact, + assert_single_clk); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_key_exact); + /** * dev_pm_opp_find_freq_exact_indexed() - Search for an exact freq for the * clock corresponding to the index diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cf477beae4bb..789406d95e69 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -98,6 +98,25 @@ struct dev_pm_opp_data { unsigned long u_volt; }; +/** + * struct dev_pm_opp_key - Key used to identify OPP entries + * @freq: Frequency in Hz. Use 0 if frequency is not to be matched. + * @level: Performance level associated with the OPP entry. + * Use OPP_LEVEL_UNSET if level is not to be matched. + * @bw: Bandwidth associated with the OPP entry. + * Use 0 if bandwidth is not to be matched. + * + * This structure is used to uniquely identify an OPP entry based on + * frequency, performance level, and bandwidth. Each field can be + * selectively ignored during matching by setting it to its respective + * NOP value. + */ +struct dev_pm_opp_key { + unsigned long freq; + unsigned int level; + u32 bw; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -131,6 +150,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); +struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available); + struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available); @@ -289,6 +312,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available) -- cgit v1.2.3 From d939047d31ff25a49a39667417c2a996caa826f5 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 27 Aug 2025 16:44:01 -0400 Subject: dt-bindings: Remove outdated cpufreq-dt.txt The information present in this file is outdated and doesn't serve any purpose with the current design of the driver. Remove the outdated file. Signed-off-by: Frank Li [ Viresh: Rewrite commit log ] Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-dt.txt | 61 ---------------------- 1 file changed, 61 deletions(-) delete mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt deleted file mode 100644 index 1d7e49167666..000000000000 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt +++ /dev/null @@ -1,61 +0,0 @@ -Generic cpufreq driver - -It is a generic DT based cpufreq driver for frequency management. It supports -both uniprocessor (UP) and symmetric multiprocessor (SMP) systems which share -clock and voltage across all CPUs. - -Both required and optional properties listed below must be defined -under node /cpus/cpu@0. - -Required properties: -- None - -Optional properties: -- operating-points: Refer to Documentation/devicetree/bindings/opp/opp-v1.yaml for - details. OPPs *must* be supplied either via DT, i.e. this property, or - populated at runtime. -- clock-latency: Specify the possible maximum transition latency for clock, - in unit of nanoseconds. -- voltage-tolerance: Specify the CPU voltage tolerance in percentage. -- #cooling-cells: - Please refer to - Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml. - -Examples: - -cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a9"; - reg = <0>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 1100000 - 396000 950000 - 198000 850000 - >; - clock-latency = <61036>; /* two CLK32 periods */ - #cooling-cells = <2>; - }; - - cpu@1 { - compatible = "arm,cortex-a9"; - reg = <1>; - next-level-cache = <&L2>; - }; - - cpu@2 { - compatible = "arm,cortex-a9"; - reg = <2>; - next-level-cache = <&L2>; - }; - - cpu@3 { - compatible = "arm,cortex-a9"; - reg = <3>; - next-level-cache = <&L2>; - }; -}; -- cgit v1.2.3 From eaa9c1f31aef4d92d0ea6c9ecde33ea08733182a Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 27 Aug 2025 10:31:48 +0800 Subject: cpufreq: brcmstb-avs: Use scope-based cleanup helper Replace the manual cpufreq_cpu_put() with __free(put_cpufreq_policy) annotation for policy references. This reduces the risk of reference counting mistakes and aligns the code with the latest kernel style. No functional change intended. Signed-off-by: Zihuan Zhang [ Viresh: Minor changes to commit log ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 5940d262374f..71450cca8e9f 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -480,7 +480,7 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct private_data *priv; if (!policy) @@ -488,8 +488,6 @@ static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) priv = policy->driver_data; - cpufreq_cpu_put(policy); - return brcm_avs_get_frequency(priv->base); } -- cgit v1.2.3 From c8dc2368b23e1a9f11be4c941e13f3985195d0c9 Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 27 Aug 2025 10:31:49 +0800 Subject: cpufreq: CPPC: Use scope-based cleanup helper Replace the manual cpufreq_cpu_put() with __free(put_cpufreq_policy) annotation for policy references. This reduces the risk of reference counting mistakes and aligns the code with the latest kernel style. No functional change intended. Signed-off-by: Zihuan Zhang [ Viresh: Minor changes ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index ecbeb12f46e6..12de0ac7bbaf 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -722,8 +722,8 @@ static int cppc_get_perf_ctrs_sample(int cpu, static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct cppc_cpudata *cpu_data; u64 delivered_perf; int ret; @@ -733,8 +733,6 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) cpu_data = policy->driver_data; - cpufreq_cpu_put(policy); - ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1); if (ret) { if (ret == -EFAULT) -- cgit v1.2.3 From 95102e0cc15688ddffc8da1f15cfec2276fa45e5 Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 27 Aug 2025 10:31:54 +0800 Subject: cpufreq: s5pv210: Use scope-based cleanup helper Replace the manual cpufreq_cpu_put() with __free(put_cpufreq_policy) annotation for policy references. This reduces the risk of reference counting mistakes and aligns the code with the latest kernel style. No functional change intended. Signed-off-by: Zihuan Zhang [ Viresh: Minor changes ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/s5pv210-cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 76c888ed8d16..4215621deb3f 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -554,17 +554,15 @@ out_dmc0: static int s5pv210_cpufreq_reboot_notifier_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(0); int ret; - struct cpufreq_policy *policy; - policy = cpufreq_cpu_get(0); if (!policy) { pr_debug("cpufreq: get no policy for cpu0\n"); return NOTIFY_BAD; } ret = cpufreq_driver_target(policy, SLEEP_FREQ, 0); - cpufreq_cpu_put(policy); if (ret < 0) return NOTIFY_BAD; -- cgit v1.2.3 From 4aeda901d6896344c5b0b3a8428c4a830007ea85 Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 27 Aug 2025 10:31:52 +0800 Subject: cpufreq: mediatek: Use scope-based cleanup helper Replace the manual cpufreq_cpu_put() with __free(put_cpufreq_policy) annotation for policy references. This reduces the risk of reference counting mistakes and aligns the code with the latest kernel style. No functional change intended. Signed-off-by: Zihuan Zhang [ Viresh: Minor changes ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index f3f02c4b6888..fae062a6431f 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -320,7 +320,6 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb, struct dev_pm_opp *new_opp; struct mtk_cpu_dvfs_info *info; unsigned long freq, volt; - struct cpufreq_policy *policy; int ret = 0; info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb); @@ -353,12 +352,12 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb, } dev_pm_opp_put(new_opp); - policy = cpufreq_cpu_get(info->opp_cpu); - if (policy) { + + struct cpufreq_policy *policy __free(put_cpufreq_policy) + = cpufreq_cpu_get(info->opp_cpu); + if (policy) cpufreq_driver_target(policy, freq / 1000, CPUFREQ_RELATION_L); - cpufreq_cpu_put(policy); - } } } -- cgit v1.2.3 From 7bc0084632dda2907ff1d021dff36ffbdd83468c Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 27 Aug 2025 10:31:55 +0800 Subject: cpufreq: tegra186: Use scope-based cleanup helper Replace the manual cpufreq_cpu_put() with __free(put_cpufreq_policy) annotation for policy references. This reduces the risk of reference counting mistakes and aligns the code with the latest kernel style. No functional change intended. Signed-off-by: Zihuan Zhang [ Viresh: Minor changes ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index cbabb726c664..4270686fc3e3 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -103,13 +103,12 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, static unsigned int tegra186_cpufreq_get(unsigned int cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); struct tegra186_cpufreq_cluster *cluster; - struct cpufreq_policy *policy; unsigned int edvd_offset, cluster_id; u32 ndiv; - policy = cpufreq_cpu_get(cpu); if (!policy) return 0; @@ -117,7 +116,6 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu) ndiv = readl(data->regs + edvd_offset) & EDVD_CORE_VOLT_FREQ_F_MASK; cluster_id = data->cpus[policy->cpu].bpmp_cluster_id; cluster = &data->clusters[cluster_id]; - cpufreq_cpu_put(policy); return (cluster->ref_clk_khz * ndiv) / cluster->div; } -- cgit v1.2.3 From 592532a77b736b5153e0c2e4c74aa50af0a352ab Mon Sep 17 00:00:00 2001 From: Dennis Beier Date: Sat, 30 Aug 2025 16:43:59 +0200 Subject: cpufreq/longhaul: handle NULL policy in longhaul_exit longhaul_exit() was calling cpufreq_cpu_get(0) without checking for a NULL policy pointer. On some systems, this could lead to a NULL dereference and a kernel warning or panic. This patch adds a check using unlikely() and returns early if the policy is NULL. Bugzilla: #219962 Signed-off-by: Dennis Beier Signed-off-by: Viresh Kumar --- drivers/cpufreq/longhaul.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index ba0e08c8486a..49e76b44468a 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -953,6 +953,9 @@ static void __exit longhaul_exit(void) struct cpufreq_policy *policy = cpufreq_cpu_get(0); int i; + if (unlikely(!policy)) + return; + for (i = 0; i < numscales; i++) { if (mults[i] == maxmult) { struct cpufreq_freqs freqs; -- cgit v1.2.3 From 5545d56fd1a9b1a3a0ac6b144afce5819f32a31b Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Sun, 17 Aug 2025 16:36:36 +0800 Subject: PM: hibernate: Use vmalloc_array() and vcalloc() to improve code Remove array_size() calls and replace vmalloc() and vzalloc() with vmalloc_array() and vcalloc() respectively to simplify the code in save_compressed_image() and load_compressed_image(). vmalloc_array() is also optimized better, resulting in less instructions being used, and vmalloc_array() handling overhead is lower [1]. Link: https://lore.kernel.org/lkml/abc66ec5-85a4-47e1-9759-2f60ab111971@vivo.com/ [1] Signed-off-by: Qianfeng Rong Link: https://patch.msgid.link/20250817083636.53872-1-rongqianfeng@vivo.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ad13c461b657..0beff7eeaaba 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -712,7 +712,7 @@ static int save_compressed_image(struct swap_map_handle *handle, goto out_clean; } - data = vzalloc(array_size(nr_threads, sizeof(*data))); + data = vcalloc(nr_threads, sizeof(*data)); if (!data) { pr_err("Failed to allocate %s data\n", hib_comp_algo); ret = -ENOMEM; @@ -1225,14 +1225,14 @@ static int load_compressed_image(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, CMP_THREADS); - page = vmalloc(array_size(CMP_MAX_RD_PAGES, sizeof(*page))); + page = vmalloc_array(CMP_MAX_RD_PAGES, sizeof(*page)); if (!page) { pr_err("Failed to allocate %s page\n", hib_comp_algo); ret = -ENOMEM; goto out_clean; } - data = vzalloc(array_size(nr_threads, sizeof(*data))); + data = vcalloc(nr_threads, sizeof(*data)); if (!data) { pr_err("Failed to allocate %s data\n", hib_comp_algo); ret = -ENOMEM; -- cgit v1.2.3 From 98da8a4aecf584af011cdc67d9581deb21088f24 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 19 Aug 2025 18:40:38 +0800 Subject: PM: hibernate: Fix typo in memory bitmaps description comment Correct 'leave' to 'leaf' in memory bitmaps description comment. Signed-off-by: Li Jun Link: https://patch.msgid.link/20250819104038.1596952-1-lijun01@kylinos.cn [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 501df0676a61..645f42e40478 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -363,7 +363,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) * * One radix tree is represented by one struct mem_zone_bm_rtree. There are * two linked lists for the nodes of the tree, one for the inner nodes and - * one for the leave nodes. The linked leave nodes are used for fast linear + * one for the leaf nodes. The linked leaf nodes are used for fast linear * access of the memory bitmap. * * The struct rtree_node represents one node of the radix tree. -- cgit v1.2.3 From 91418337a27d800b33d6d391f565b1fa2c7ff75c Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Mon, 18 Aug 2025 14:21:24 +0530 Subject: intel_idle: Remove unnecessary address-of operators Remove redundant address-of operators (&) when assigning the intel_idle() function pointer to the .enter field in cpuidle_state structures.in C, the & is not needed for function names. This change improves code consistency and readability by using the more conventional form without the & operator. No functional change intended. Signed-off-by: Kaushlendra Kumar Reviewed-by: Artem Bityutskiy Link: https://patch.msgid.link/20250818085124.3897921-1-kaushlendra.kumar@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 256 +++++++++++++++++++++++----------------------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 91a7b7e7c0c8..9ba83954c255 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -259,7 +259,7 @@ static struct cpuidle_state nehalem_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 3, .target_residency = 6, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -267,7 +267,7 @@ static struct cpuidle_state nehalem_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -275,7 +275,7 @@ static struct cpuidle_state nehalem_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -283,7 +283,7 @@ static struct cpuidle_state nehalem_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -296,7 +296,7 @@ static struct cpuidle_state snb_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -304,7 +304,7 @@ static struct cpuidle_state snb_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -312,7 +312,7 @@ static struct cpuidle_state snb_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -320,7 +320,7 @@ static struct cpuidle_state snb_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7", @@ -328,7 +328,7 @@ static struct cpuidle_state snb_cstates[] __initdata = { .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -341,7 +341,7 @@ static struct cpuidle_state byt_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6N", @@ -349,7 +349,7 @@ static struct cpuidle_state byt_cstates[] __initdata = { .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 275, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", @@ -357,7 +357,7 @@ static struct cpuidle_state byt_cstates[] __initdata = { .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 500, .target_residency = 560, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7", @@ -365,7 +365,7 @@ static struct cpuidle_state byt_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 1200, .target_residency = 4000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7S", @@ -373,7 +373,7 @@ static struct cpuidle_state byt_cstates[] __initdata = { .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 10000, .target_residency = 20000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -386,7 +386,7 @@ static struct cpuidle_state cht_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6N", @@ -394,7 +394,7 @@ static struct cpuidle_state cht_cstates[] __initdata = { .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 275, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", @@ -402,7 +402,7 @@ static struct cpuidle_state cht_cstates[] __initdata = { .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 560, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7", @@ -410,7 +410,7 @@ static struct cpuidle_state cht_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 1200, .target_residency = 4000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7S", @@ -418,7 +418,7 @@ static struct cpuidle_state cht_cstates[] __initdata = { .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 10000, .target_residency = 20000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -431,7 +431,7 @@ static struct cpuidle_state ivb_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -439,7 +439,7 @@ static struct cpuidle_state ivb_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -447,7 +447,7 @@ static struct cpuidle_state ivb_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -455,7 +455,7 @@ static struct cpuidle_state ivb_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7", @@ -463,7 +463,7 @@ static struct cpuidle_state ivb_cstates[] __initdata = { .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -476,7 +476,7 @@ static struct cpuidle_state ivt_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -484,7 +484,7 @@ static struct cpuidle_state ivt_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 80, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -492,7 +492,7 @@ static struct cpuidle_state ivt_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -500,7 +500,7 @@ static struct cpuidle_state ivt_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 82, .target_residency = 300, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -513,7 +513,7 @@ static struct cpuidle_state ivt_cstates_4s[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -521,7 +521,7 @@ static struct cpuidle_state ivt_cstates_4s[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 250, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -529,7 +529,7 @@ static struct cpuidle_state ivt_cstates_4s[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 300, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -537,7 +537,7 @@ static struct cpuidle_state ivt_cstates_4s[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 84, .target_residency = 400, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -550,7 +550,7 @@ static struct cpuidle_state ivt_cstates_8s[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -558,7 +558,7 @@ static struct cpuidle_state ivt_cstates_8s[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -566,7 +566,7 @@ static struct cpuidle_state ivt_cstates_8s[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 600, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -574,7 +574,7 @@ static struct cpuidle_state ivt_cstates_8s[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 88, .target_residency = 700, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -587,7 +587,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -595,7 +595,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -603,7 +603,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, .target_residency = 100, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -611,7 +611,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7s", @@ -619,7 +619,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -627,7 +627,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 900, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C9", @@ -635,7 +635,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, .target_residency = 1800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -643,7 +643,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, .target_residency = 7700, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -655,7 +655,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -663,7 +663,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -671,7 +671,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, .target_residency = 100, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -679,7 +679,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7s", @@ -687,7 +687,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -695,7 +695,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 900, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C9", @@ -703,7 +703,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, .target_residency = 1800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -711,7 +711,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, .target_residency = 7700, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -724,7 +724,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -732,7 +732,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C3", @@ -740,7 +740,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 70, .target_residency = 100, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -748,7 +748,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7s", @@ -756,7 +756,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -764,7 +764,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C9", @@ -772,7 +772,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -780,7 +780,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -793,7 +793,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -801,7 +801,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -809,7 +809,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -822,7 +822,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -830,7 +830,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 4, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -838,7 +838,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 170, .target_residency = 600, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -861,7 +861,7 @@ static struct cpuidle_state adl_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -869,7 +869,7 @@ static struct cpuidle_state adl_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -877,7 +877,7 @@ static struct cpuidle_state adl_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 220, .target_residency = 600, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -885,7 +885,7 @@ static struct cpuidle_state adl_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 280, .target_residency = 800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -893,7 +893,7 @@ static struct cpuidle_state adl_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 680, .target_residency = 2000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -906,7 +906,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -914,7 +914,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -922,7 +922,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 170, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -930,7 +930,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 600, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -938,7 +938,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 230, .target_residency = 700, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -951,7 +951,7 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -959,7 +959,7 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 420, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -967,7 +967,7 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 310, .target_residency = 930, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -980,7 +980,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -988,7 +988,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -996,7 +996,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 195, .target_residency = 585, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -1004,7 +1004,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 260, .target_residency = 1040, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -1012,7 +1012,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 660, .target_residency = 1980, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1025,7 +1025,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1033,7 +1033,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1042,7 +1042,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { CPUIDLE_FLAG_INIT_XSTATE, .exit_latency = 290, .target_residency = 800, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1055,7 +1055,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1063,7 +1063,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 4, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1073,7 +1073,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 170, .target_residency = 650, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6P", @@ -1083,7 +1083,7 @@ static struct cpuidle_state gnr_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 210, .target_residency = 1000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1096,7 +1096,7 @@ static struct cpuidle_state gnrd_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1104,7 +1104,7 @@ static struct cpuidle_state gnrd_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 4, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1114,7 +1114,7 @@ static struct cpuidle_state gnrd_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 220, .target_residency = 650, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6P", @@ -1124,7 +1124,7 @@ static struct cpuidle_state gnrd_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 240, .target_residency = 750, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1137,7 +1137,7 @@ static struct cpuidle_state atom_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C2", @@ -1145,7 +1145,7 @@ static struct cpuidle_state atom_cstates[] __initdata = { .flags = MWAIT2flg(0x10), .exit_latency = 20, .target_residency = 80, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C4", @@ -1153,7 +1153,7 @@ static struct cpuidle_state atom_cstates[] __initdata = { .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1161,7 +1161,7 @@ static struct cpuidle_state atom_cstates[] __initdata = { .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1173,7 +1173,7 @@ static struct cpuidle_state tangier_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 4, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C4", @@ -1181,7 +1181,7 @@ static struct cpuidle_state tangier_cstates[] __initdata = { .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1189,7 +1189,7 @@ static struct cpuidle_state tangier_cstates[] __initdata = { .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7", @@ -1197,7 +1197,7 @@ static struct cpuidle_state tangier_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 1200, .target_residency = 4000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C9", @@ -1205,7 +1205,7 @@ static struct cpuidle_state tangier_cstates[] __initdata = { .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 10000, .target_residency = 20000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1217,7 +1217,7 @@ static struct cpuidle_state avn_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1225,7 +1225,7 @@ static struct cpuidle_state avn_cstates[] __initdata = { .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 15, .target_residency = 45, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1237,7 +1237,7 @@ static struct cpuidle_state knl_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle }, { .name = "C6", @@ -1245,7 +1245,7 @@ static struct cpuidle_state knl_cstates[] __initdata = { .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 120, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle }, { .enter = NULL } @@ -1258,7 +1258,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1266,7 +1266,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1274,7 +1274,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 133, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C7s", @@ -1282,7 +1282,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 155, .target_residency = 155, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C8", @@ -1290,7 +1290,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 1000, .target_residency = 1000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C9", @@ -1298,7 +1298,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2000, .target_residency = 2000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", @@ -1306,7 +1306,7 @@ static struct cpuidle_state bxt_cstates[] __initdata = { .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 10000, .target_residency = 10000, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1319,7 +1319,7 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1327,7 +1327,7 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1335,7 +1335,7 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 50, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1352,7 +1352,7 @@ static struct cpuidle_state snr_cstates[] __initdata = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1360,7 +1360,7 @@ static struct cpuidle_state snr_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 15, .target_residency = 25, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6", @@ -1368,7 +1368,7 @@ static struct cpuidle_state snr_cstates[] __initdata = { .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 130, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1381,7 +1381,7 @@ static struct cpuidle_state grr_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1389,7 +1389,7 @@ static struct cpuidle_state grr_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 10, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", @@ -1397,7 +1397,7 @@ static struct cpuidle_state grr_cstates[] __initdata = { .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 500, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } @@ -1410,7 +1410,7 @@ static struct cpuidle_state srf_cstates[] __initdata = { .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", @@ -1418,7 +1418,7 @@ static struct cpuidle_state srf_cstates[] __initdata = { .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 10, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", @@ -1427,7 +1427,7 @@ static struct cpuidle_state srf_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 270, .target_residency = 700, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6SP", @@ -1436,7 +1436,7 @@ static struct cpuidle_state srf_cstates[] __initdata = { CPUIDLE_FLAG_PARTIAL_HINT_MATCH, .exit_latency = 310, .target_residency = 900, - .enter = &intel_idle, + .enter = intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } -- cgit v1.2.3 From 3e681899cc6e6c77eca55dd8c7cc57b27868e8a2 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Wed, 3 Sep 2025 20:12:27 +0800 Subject: cpufreq: mediatek: avoid redundant conditions While 'if (i <= 0) ... else if (i > 0) ...' is technically equivalent to 'if (i <= 0) ... else ...', the latter is vastly easier to read because it avoids writing out a condition that is unnecessary. Let's drop such unnecessary conditions. Signed-off-by: Liao Yuanhong Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index fae062a6431f..00de1166188a 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -123,7 +123,7 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, soc_data->sram_max_volt); return ret; } - } else if (pre_vproc > new_vproc) { + } else { vproc = max(new_vproc, pre_vsram - soc_data->max_volt_shift); ret = regulator_set_voltage(proc_reg, vproc, -- cgit v1.2.3 From 56a232d93cea0ba14da5e3157830330756a45b4c Mon Sep 17 00:00:00 2001 From: Samuel Wu Date: Wed, 20 Aug 2025 17:42:33 -0700 Subject: PM: sleep: Make pm_wakeup_clear() call more clear Move pm_wakeup_clear() to the same location as other functions that do bookkeeping prior to suspend_prepare(). Since calling pm_wakeup_clear() is a prerequisite to setting up for suspend and enabling functionalities of suspend (like aborting during suspend), moving pm_wakeup_clear() higher up the call stack makes its intent more clear and obvious that it is called prior to suspend_prepare(). After this change, there is a slightly larger window when abort events can be registered, but otherwise suspend functionality is the same. Suggested-by: Saravana Kannan Signed-off-by: Samuel Wu Link: https://patch.msgid.link/20250821004237.2712312-2-wusamuel@google.com Reviewed-by: Saravana Kannan [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 1 - kernel/power/suspend.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index dc0dfc349f22..8ff68ebaa1e0 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,7 +132,6 @@ int freeze_processes(void) if (!pm_freezing) static_branch_inc(&freezer_active); - pm_wakeup_clear(0); pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index b4ca17c2fecf..4bb4686c1c08 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -595,6 +595,7 @@ static int enter_state(suspend_state_t state) } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); + pm_wakeup_clear(0); pm_suspend_clear_flags(); error = suspend_prepare(state); if (error) -- cgit v1.2.3 From 97248d05b70edc674f2f2fa835fed33172686b1d Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Tue, 2 Sep 2025 15:33:23 +0800 Subject: cpufreq: Drop redundant freq_table parameter Since commit e0b3165ba521 ("cpufreq: add 'freq_table' in struct cpufreq_policy"), freq_table has been stored in struct cpufreq_policy instead of being maintained separately. However, several helpers in freq_table.c still take both policy and freq_table as parameters, even though policy->freq_table can always be used. This leads to redundant function arguments and increases the chance of inconsistencies. This patch removes the unnecessary freq_table argument from these functions and updates their callers to only pass policy. This makes the code simpler, more consistent, and avoids duplication. Signed-off-by: Zihuan Zhang Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250902073323.48330-1-zhangzihuan@kylinos.cn Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- drivers/cpufreq/freq_table.c | 14 ++++++-------- drivers/cpufreq/sh-cpufreq.c | 6 ++---- drivers/cpufreq/virtual-cpufreq.c | 2 +- include/linux/cpufreq.h | 7 +++---- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a615c98d80ca..5fcc99f768d2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2793,7 +2793,7 @@ int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) if (!policy->freq_table) return -ENXIO; - ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); + ret = cpufreq_frequency_table_cpuinfo(policy); if (ret) { pr_err("%s: Policy frequency update failed\n", __func__); return ret; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 35de513af6c9..d5111ee56e38 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -28,10 +28,9 @@ static bool policy_has_boost_freq(struct cpufreq_policy *policy) return false; } -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table) +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy) { - struct cpufreq_frequency_table *pos; + struct cpufreq_frequency_table *pos, *table = policy->freq_table; unsigned int min_freq = ~0; unsigned int max_freq = 0; unsigned int freq; @@ -65,10 +64,9 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, return 0; } -int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, - struct cpufreq_frequency_table *table) +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy) { - struct cpufreq_frequency_table *pos; + struct cpufreq_frequency_table *pos, *table = policy->freq_table; unsigned int freq, prev_smaller = 0; bool found = false; @@ -110,7 +108,7 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) if (!policy->freq_table) return -ENODEV; - return cpufreq_frequency_table_verify(policy, policy->freq_table); + return cpufreq_frequency_table_verify(policy); } EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); @@ -354,7 +352,7 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) return 0; } - ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); + ret = cpufreq_frequency_table_cpuinfo(policy); if (ret) return ret; diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 9c0b01e00508..642ddb9ea217 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -89,11 +89,9 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy, static int sh_cpufreq_verify(struct cpufreq_policy_data *policy) { struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu); - struct cpufreq_frequency_table *freq_table; - freq_table = cpuclk->nr_freqs ? cpuclk->freq_table : NULL; - if (freq_table) - return cpufreq_frequency_table_verify(policy, freq_table); + if (policy->freq_table) + return cpufreq_frequency_table_verify(policy); cpufreq_verify_within_cpu_limits(policy); diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c index 7dd1b0c263c7..6ffa16d239b2 100644 --- a/drivers/cpufreq/virtual-cpufreq.c +++ b/drivers/cpufreq/virtual-cpufreq.c @@ -250,7 +250,7 @@ static int virt_cpufreq_offline(struct cpufreq_policy *policy) static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy) { if (policy->freq_table) - return cpufreq_frequency_table_verify(policy, policy->freq_table); + return cpufreq_frequency_table_verify(policy); cpufreq_verify_within_cpu_limits(policy); return 0; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 95f3807c8c55..40966512ea18 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -780,11 +780,10 @@ struct cpufreq_frequency_table { else -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table); +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy); + +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy); -int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, - struct cpufreq_frequency_table *table); int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, -- cgit v1.2.3 From b49d70849530f046b1f9a42c963840ca91c60929 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 2 Sep 2025 19:45:42 +0800 Subject: cpufreq: Use int type to store negative error codes Change the 'ret' variable in store_scaling_setspeed() from unsigned int to int, as it needs to store either negative error codes or zero returned by kstrtouint(). No effect on runtime. Signed-off-by: Qianfeng Rong Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250902114545.651661-2-rongqianfeng@vivo.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5fcc99f768d2..6812afdf0dbf 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -914,7 +914,7 @@ static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, const char *buf, size_t count) { unsigned int freq = 0; - unsigned int ret; + int ret; if (!policy->governor || !policy->governor->store_setspeed) return -EINVAL; -- cgit v1.2.3 From e5e9b7bd864261f8132abb542afac4758e6232a3 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 2 Sep 2025 19:45:44 +0800 Subject: cpufreq: speedstep-lib: Use int type to store negative error codes Change the return type of the speedstep_get_freqs() function from unsigned int to int because it may return negative error codes. For the same reason, change the 'ret' variables to int type as well. No effect on runtime. Signed-off-by: Qianfeng Rong Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250902114545.651661-4-rongqianfeng@vivo.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-lib.c | 12 ++++++------ drivers/cpufreq/speedstep-lib.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 0b66df4ed513..f8b42e981635 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -378,16 +378,16 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(enum speedstep_processor processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)) +int speedstep_get_freqs(enum speedstep_processor processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state)(unsigned int state)) { unsigned int prev_speed; - unsigned int ret = 0; unsigned long flags; ktime_t tv1, tv2; + int ret = 0; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; diff --git a/drivers/cpufreq/speedstep-lib.h b/drivers/cpufreq/speedstep-lib.h index dc762ea786be..48329647d4c4 100644 --- a/drivers/cpufreq/speedstep-lib.h +++ b/drivers/cpufreq/speedstep-lib.h @@ -41,8 +41,8 @@ extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); +extern int speedstep_get_freqs(enum speedstep_processor processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state)(unsigned int state)); -- cgit v1.2.3 From be82483d1b60baf6747884bd74cb7de484deaf76 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Sep 2025 15:55:45 +0200 Subject: PM: sleep: core: Clear power.must_resume in noirq suspend error path If system suspend is aborted in the "noirq" phase (for instance, due to an error returned by one of the device callbacks), power.is_noirq_suspended will not be set for some devices and device_resume_noirq() will return early for them. Consequently, noirq resume callbacks will not run for them at all because the noirq suspend callbacks have not run for them yet. If any of them has power.must_resume set and late suspend has been skipped for it (due to power.smart_suspend), early resume should be skipped for it either, or its state may become inconsistent (for instance, if the early resume assumes that it will always follow noirq resume). Make that happen by clearing power.must_resume in device_resume_noirq() for devices with power.is_noirq_suspended clear that have been left in suspend by device_suspend_late(), which will subsequently cause device_resume_early() to leave the device in suspend and avoid changing its state. Fixes: 0d4b54c6fee8 ("PM / core: Add LEAVE_SUSPENDED driver flag") Link: https://lore.kernel.org/linux-pm/5d692b81-6f58-4e86-9cb0-ede69a09d799@rowland.harvard.edu/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/3381776.aeNJFYEL58@rafael.j.wysocki --- drivers/base/power/main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2ea6e05e6ec9..c883b01ffbdd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -724,8 +724,20 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy if (dev->power.syscore || dev->power.direct_complete) goto Out; - if (!dev->power.is_noirq_suspended) + if (!dev->power.is_noirq_suspended) { + /* + * This means that system suspend has been aborted in the noirq + * phase before invoking the noirq suspend callback for the + * device, so if device_suspend_late() has left it in suspend, + * device_resume_early() should leave it in suspend either in + * case the early resume of it depends on the noirq resume that + * has not run. + */ + if (dev_pm_skip_suspend(dev)) + dev->power.must_resume = false; + goto Out; + } if (!dpm_wait_for_superior(dev, async)) goto Out; -- cgit v1.2.3 From c05fa4091863468fd53abf1334c05052eda1ff52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Sep 2025 16:56:04 +0200 Subject: cpufreq: core: Rearrange variable declarations involving __free() Follow cleanup.h recommendations and always define and assign variables in one statement when __free() is used. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Link: https://patch.msgid.link/4691667.LvFx2qVVIh@rafael.j.wysocki --- drivers/cpufreq/cpufreq.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6812afdf0dbf..30e8e9b3c12f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1845,7 +1845,6 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b */ unsigned int cpufreq_quick_get(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; unsigned long flags; read_lock_irqsave(&cpufreq_driver_lock, flags); @@ -1860,7 +1859,7 @@ unsigned int cpufreq_quick_get(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (policy) return policy->cur; @@ -1876,9 +1875,7 @@ EXPORT_SYMBOL(cpufreq_quick_get); */ unsigned int cpufreq_quick_get_max(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (policy) return policy->max; @@ -1894,9 +1891,7 @@ EXPORT_SYMBOL(cpufreq_quick_get_max); */ __weak unsigned int cpufreq_get_hw_max_freq(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (policy) return policy->cpuinfo.max_freq; @@ -1920,9 +1915,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) */ unsigned int cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return 0; @@ -2751,9 +2744,7 @@ static void cpufreq_policy_refresh(struct cpufreq_policy *policy) */ void cpufreq_update_policy(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return; @@ -2770,9 +2761,7 @@ EXPORT_SYMBOL(cpufreq_update_policy); */ void cpufreq_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return; @@ -3054,9 +3043,7 @@ static int __init cpufreq_core_init(void) static bool cpufreq_policy_is_good_for_eas(unsigned int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) { pr_debug("cpufreq policy not set for CPU: %d\n", cpu); return false; -- cgit v1.2.3 From 5f2ec3536d7933a804d28d2541e4e918ba32587b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Sep 2025 16:59:47 +0200 Subject: cpufreq: intel_pstate: Rearrange variable declaration involving __free() Follow cleanup.h recommendations and define and assign a variable in one statement when __free() is used. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Reviewed-by: Zihuan Zhang Link: https://patch.msgid.link/2251447.irdbgypaU6@rafael.j.wysocki --- drivers/cpufreq/intel_pstate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f366d35c5840..81fc1408754a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1502,9 +1502,7 @@ static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, static bool intel_pstate_update_max_freq(struct cpudata *cpudata) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpudata->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); if (!policy) return false; -- cgit v1.2.3 From 42c74f6b1c3694892ab92750a60aeb6212d73ac1 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 3 Sep 2025 17:06:07 -0700 Subject: cpufreq: intel_pstate: Remove EPB-related code The intel_pstate driver does not enable HWP mode when CPUID.06H:EAX[10] is not set, indicating that EPP (Energy Performance Preference) is not supported by the hardware. When EPP is unavailable, the system falls back to using EPB (Energy Performance Bias) if the feature is supported. However, since the intel_pstate driver will not enable HWP in this scenario, any EPB-related code becomes unreachable and irrelevant. Remove the EPB handling code paths simplifying the driver logic and reducing code size. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250904000608.260817-1-srinivas.pandruvada@linux.intel.com [ rjw: Subject adjustment ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 49 ++++-------------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 81fc1408754a..e9fe5dd6c792 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -620,24 +620,9 @@ static int min_perf_pct_min(void) (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0; } -static s16 intel_pstate_get_epb(struct cpudata *cpu_data) -{ - u64 epb; - int ret; - - if (!boot_cpu_has(X86_FEATURE_EPB)) - return -ENXIO; - - ret = rdmsrq_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); - if (ret) - return (s16)ret; - - return (s16)(epb & 0x0f); -} - static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) { - s16 epp; + s16 epp = -EOPNOTSUPP; if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* @@ -651,34 +636,13 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) return epp; } epp = (hwp_req_data >> 24) & 0xff; - } else { - /* When there is no EPP present, HWP uses EPB settings */ - epp = intel_pstate_get_epb(cpu_data); } return epp; } -static int intel_pstate_set_epb(int cpu, s16 pref) -{ - u64 epb; - int ret; - - if (!boot_cpu_has(X86_FEATURE_EPB)) - return -ENXIO; - - ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); - if (ret) - return ret; - - epb = (epb & ~0x0f) | pref; - wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb); - - return 0; -} - /* - * EPP/EPB display strings corresponding to EPP index in the + * EPP display strings corresponding to EPP index in the * energy_perf_strings[] * index String *------------------------------------- @@ -782,7 +746,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, u32 raw_epp) { int epp = -EINVAL; - int ret; + int ret = -EOPNOTSUPP; if (!pref_index) epp = cpu_data->epp_default; @@ -802,10 +766,6 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, return -EBUSY; ret = intel_pstate_set_epp(cpu_data, epp); - } else { - if (epp == -EINVAL) - epp = (pref_index - 1) << 2; - ret = intel_pstate_set_epb(cpu_data->cpu, epp); } return ret; @@ -1337,9 +1297,8 @@ static void intel_pstate_hwp_set(unsigned int cpu) if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { value &= ~GENMASK_ULL(31, 24); value |= (u64)epp << 24; - } else { - intel_pstate_set_epb(cpu, epp); } + skip_epp: WRITE_ONCE(cpu_data->hwp_req_cached, value); wrmsrq_on_cpu(cpu, MSR_HWP_REQUEST, value); -- cgit v1.2.3 From fdd9ae23bb989fa9ed1beebba7d3e0c82c7c81ae Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Sep 2025 15:43:50 +0200 Subject: PM: core: Annotate loops walking device links as _srcu Since SRCU is used for the protection of device link lists, the loops over device link lists in multiple places in drivers/base/power/main.c and in pm_runtime_get_suppliers() should be annotated as _srcu rather than as _rcu which is the case currently. Change the annotations accordingly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2393512.ElGaqSPkdT@rafael.j.wysocki --- drivers/base/power/main.c | 18 +++++++++--------- drivers/base/power/runtime.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2ea6e05e6ec9..2fa53896db82 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -40,8 +40,8 @@ typedef int (*pm_callback_t)(struct device *); -#define list_for_each_entry_rcu_locked(pos, head, member) \ - list_for_each_entry_rcu(pos, head, member, \ +#define list_for_each_entry_srcu_locked(pos, head, member) \ + list_for_each_entry_srcu(pos, head, member, \ device_links_read_lock_held()) /* @@ -281,7 +281,7 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) * callbacks freeing the link objects for the links in the list we're * walking. */ - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->supplier, async); @@ -338,7 +338,7 @@ static void dpm_wait_for_consumers(struct device *dev, bool async) * continue instead of trying to continue in parallel with its * unregistration). */ - list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) + list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->consumer, async); @@ -675,7 +675,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" consumers. */ - list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) + list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->consumer, func); @@ -1330,7 +1330,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" suppliers. */ - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->supplier, func); @@ -1384,7 +1384,7 @@ static void dpm_superior_set_must_resume(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; device_links_read_unlock(idx); @@ -1813,7 +1813,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) { spin_lock_irq(&link->supplier->power.lock); link->supplier->power.direct_complete = false; spin_unlock_irq(&link->supplier->power.lock); @@ -2065,7 +2065,7 @@ static bool device_prepare_smart_suspend(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) { if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) continue; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 3e84dc4122de..8c23a11e8017 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1903,8 +1903,8 @@ void pm_runtime_get_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) + list_for_each_entry_srcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) if (device_link_test(link, DL_FLAG_PM_RUNTIME)) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); -- cgit v1.2.3 From 3ce3f569991347d2085925041f4932232da43bcf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Sep 2025 15:45:14 +0200 Subject: PM: core: Add two macros for walking device links Add separate macros for walking links to suppliers and consumers of a device to help device links users to avoid exposing the internals of struct dev_links_info in their code and possible coding mistakes related to that. Accordingly, use the new macros to replace open-coded device links list walks in the core power management code. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/1944671.tdWV9SEqCh@rafael.j.wysocki --- drivers/base/base.h | 8 ++++++++ drivers/base/power/main.c | 18 +++++++----------- drivers/base/power/runtime.c | 3 +-- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 123031a757d9..700aecd22fd3 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -251,6 +251,14 @@ void device_links_unbind_consumers(struct device *dev); void fw_devlink_drivers_done(void); void fw_devlink_probing_done(void); +#define dev_for_each_link_to_supplier(__link, __dev) \ + list_for_each_entry_srcu(__link, &(__dev)->links.suppliers, c_node, \ + device_links_read_lock_held()) + +#define dev_for_each_link_to_consumer(__link, __dev) \ + list_for_each_entry_srcu(__link, &(__dev)->links.consumers, s_node, \ + device_links_read_lock_held()) + /* device pm support */ void device_pm_move_to_tail(struct device *dev); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2fa53896db82..e3a7f25bf8d3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -40,10 +40,6 @@ typedef int (*pm_callback_t)(struct device *); -#define list_for_each_entry_srcu_locked(pos, head, member) \ - list_for_each_entry_srcu(pos, head, member, \ - device_links_read_lock_held()) - /* * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and @@ -281,7 +277,7 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) * callbacks freeing the link objects for the links in the list we're * walking. */ - list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) + dev_for_each_link_to_supplier(link, dev) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->supplier, async); @@ -338,7 +334,7 @@ static void dpm_wait_for_consumers(struct device *dev, bool async) * continue instead of trying to continue in parallel with its * unregistration). */ - list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node) + dev_for_each_link_to_consumer(link, dev) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->consumer, async); @@ -675,7 +671,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" consumers. */ - list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node) + dev_for_each_link_to_consumer(link, dev) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->consumer, func); @@ -1330,7 +1326,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" suppliers. */ - list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) + dev_for_each_link_to_supplier(link, dev) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->supplier, func); @@ -1384,7 +1380,7 @@ static void dpm_superior_set_must_resume(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) + dev_for_each_link_to_supplier(link, dev) link->supplier->power.must_resume = true; device_links_read_unlock(idx); @@ -1813,7 +1809,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) { + dev_for_each_link_to_supplier(link, dev) { spin_lock_irq(&link->supplier->power.lock); link->supplier->power.direct_complete = false; spin_unlock_irq(&link->supplier->power.lock); @@ -2065,7 +2061,7 @@ static bool device_prepare_smart_suspend(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) { + dev_for_each_link_to_supplier(link, dev) { if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) continue; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8c23a11e8017..7420b9851fe0 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1903,8 +1903,7 @@ void pm_runtime_get_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_srcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) + dev_for_each_link_to_supplier(link, dev) if (device_link_test(link, DL_FLAG_PM_RUNTIME)) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); -- cgit v1.2.3 From fc33bf0e097c6834646b98a7b3da0ae5b617f0f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Aug 2025 18:58:23 +0300 Subject: PM / devfreq: mtk-cci: Fix potential error pointer dereference in probe() The drv->sram_reg pointer could be set to ERR_PTR(-EPROBE_DEFER) which would lead to a error pointer dereference. Use IS_ERR_OR_NULL() to check that the pointer is valid. Fixes: e09bd5757b52 ("PM / devfreq: mtk-cci: Handle sram regulator probe deferral") Signed-off-by: Dan Carpenter Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/aJTNHz8kk8s6Q2os@stanley.mountain/ --- drivers/devfreq/mtk-cci-devfreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c index 22fe9e631f8a..5730076846e1 100644 --- a/drivers/devfreq/mtk-cci-devfreq.c +++ b/drivers/devfreq/mtk-cci-devfreq.c @@ -386,7 +386,8 @@ out_disable_cci_clk: out_free_resources: if (regulator_is_enabled(drv->proc_reg)) regulator_disable(drv->proc_reg); - if (drv->sram_reg && regulator_is_enabled(drv->sram_reg)) + if (!IS_ERR_OR_NULL(drv->sram_reg) && + regulator_is_enabled(drv->sram_reg)) regulator_disable(drv->sram_reg); return ret; -- cgit v1.2.3 From 6d0982ae7880f2c4d5fbd1b5f7574ea93d1311ac Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Wed, 3 Sep 2025 20:14:52 +0800 Subject: PM / devfreq: mtk-cci: avoid redundant conditions While 'if (i <= 0) ... else if (i > 0) ...' is technically equivalent to 'if (i <= 0) ... else ...', the latter is vastly easier to read because it avoids writing out a condition that is unnecessary. Let's drop such unnecessary conditions. Signed-off-by: Liao Yuanhong Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250903121452.387023-1-liaoyuanhong@vivo.com/ --- drivers/devfreq/mtk-cci-devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c index 5730076846e1..4c22be728f6a 100644 --- a/drivers/devfreq/mtk-cci-devfreq.c +++ b/drivers/devfreq/mtk-cci-devfreq.c @@ -86,7 +86,7 @@ static int mtk_ccifreq_set_voltage(struct mtk_ccifreq_drv *drv, int new_voltage) soc_data->sram_max_volt); return ret; } - } else if (pre_voltage > new_voltage) { + } else { voltage = max(new_voltage, pre_vsram - soc_data->max_volt_shift); ret = regulator_set_voltage(drv->proc_reg, voltage, -- cgit v1.2.3 From 69e5d50fcf4093fb3f9f41c4f931f12c2ca8c467 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Sep 2025 15:52:03 +0200 Subject: cpufreq: intel_pstate: Fix object lifecycle issue in update_qos_request() The cpufreq_cpu_put() call in update_qos_request() takes place too early because the latter subsequently calls freq_qos_update_request() that indirectly accesses the policy object in question through the QoS request object passed to it. Fortunately, update_qos_request() is called under intel_pstate_driver_lock, so this issue does not matter for changing the intel_pstate operation mode, but it theoretically can cause a crash to occur on CPU device hot removal (which currently can only happen in virt, but it is formally supported nevertheless). Address this issue by modifying update_qos_request() to drop the reference to the policy later. Fixes: da5c504c7aae ("cpufreq: intel_pstate: Implement QoS supported freq constraints") Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Zihuan Zhang Link: https://patch.msgid.link/2255671.irdbgypaU6@rafael.j.wysocki --- drivers/cpufreq/intel_pstate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e9fe5dd6c792..4cb5e40dd264 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1667,10 +1667,10 @@ static void update_qos_request(enum freq_qos_req_type type) continue; req = policy->driver_data; - cpufreq_cpu_put(policy); - - if (!req) + if (!req) { + cpufreq_cpu_put(policy); continue; + } if (hwp_active) intel_pstate_get_hwp_cap(cpu); @@ -1686,6 +1686,8 @@ static void update_qos_request(enum freq_qos_req_type type) if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", i); + + cpufreq_cpu_put(policy); } } -- cgit v1.2.3 From f1bbf5bbf254cba0ae3a46cf4c0a9a6257805b79 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Sep 2025 15:52:54 +0200 Subject: cpufreq: intel_pstate: Rearrange freq QoS updates using __free() Move the code from the for_each_possible_cpu() loop in update_qos_request() to a separate function and use __free() for cpufreq policy reference counting in it to avoid having to call cpufreq_cpu_put() repeatedly (or using goto). While at it, rename update_qos_request() to update_qos_requests() because it updates multiple requests in one go. No intentional functional impact. Link: https://lore.kernel.org/linux-pm/CAJZ5v0gN1T5woSF0tO=TbAh+2-sWzxFjWyDbB7wG2TFCOU01iQ@mail.gmail.com/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Zihuan Zhang Link: https://patch.msgid.link/3026597.e9J7NaK4W3@rafael.j.wysocki [ rjw: Rename "cpu" to "cpudata" and "cpunum" to "cpu" in new code ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 57 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4cb5e40dd264..33a3debb139f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1652,43 +1652,42 @@ unlock_driver: return count; } -static void update_qos_request(enum freq_qos_req_type type) +static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) { + struct cpudata *cpudata = all_cpu_data[cpu]; struct freq_qos_request *req; - struct cpufreq_policy *policy; - int i; + unsigned int freq, perf_pct; - for_each_possible_cpu(i) { - struct cpudata *cpu = all_cpu_data[i]; - unsigned int freq, perf_pct; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); + if (!policy) + return; - policy = cpufreq_cpu_get(i); - if (!policy) - continue; + req = policy->driver_data; + if (!req) + return; - req = policy->driver_data; - if (!req) { - cpufreq_cpu_put(policy); - continue; - } + if (hwp_active) + intel_pstate_get_hwp_cap(cpudata); - if (hwp_active) - intel_pstate_get_hwp_cap(cpu); + if (type == FREQ_QOS_MIN) { + perf_pct = global.min_perf_pct; + } else { + req++; + perf_pct = global.max_perf_pct; + } - if (type == FREQ_QOS_MIN) { - perf_pct = global.min_perf_pct; - } else { - req++; - perf_pct = global.max_perf_pct; - } + freq = DIV_ROUND_UP(cpudata->pstate.turbo_freq * perf_pct, 100); - freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100); + if (freq_qos_update_request(req, freq) < 0) + pr_warn("Failed to update freq constraint: CPU%d\n", cpu); +} - if (freq_qos_update_request(req, freq) < 0) - pr_warn("Failed to update freq constraint: CPU%d\n", i); +static void update_qos_requests(enum freq_qos_req_type type) +{ + int i; - cpufreq_cpu_put(policy); - } + for_each_possible_cpu(i) + update_cpu_qos_request(i, type); } static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, @@ -1717,7 +1716,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, if (intel_pstate_driver == &intel_pstate) intel_pstate_update_policies(); else - update_qos_request(FREQ_QOS_MAX); + update_qos_requests(FREQ_QOS_MAX); mutex_unlock(&intel_pstate_driver_lock); @@ -1751,7 +1750,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, if (intel_pstate_driver == &intel_pstate) intel_pstate_update_policies(); else - update_qos_request(FREQ_QOS_MIN); + update_qos_requests(FREQ_QOS_MIN); mutex_unlock(&intel_pstate_driver_lock); -- cgit v1.2.3 From ae1bdd23b99f64335c69d546bff99ca39b894c18 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Sep 2025 15:53:54 +0200 Subject: cpufreq: intel_pstate: Adjust frequency percentage computations Adjust frequency percentage computations in update_cpu_qos_request() to avoid going above the exact numerical percentage in the FREQ_QOS_MAX case. Signed-off-by: Rafael J. Wysocki Acked-by: Zihuan Zhang Link: https://patch.msgid.link/3395556.44csPzL39Z@rafael.j.wysocki [ rjw: Rename "cpu" to "cpudata" and "cpunum" to "cpu" ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 33a3debb139f..20746c35d3ba 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1655,8 +1655,8 @@ unlock_driver: static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) { struct cpudata *cpudata = all_cpu_data[cpu]; + unsigned int freq = cpudata->pstate.turbo_freq; struct freq_qos_request *req; - unsigned int freq, perf_pct; struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) @@ -1670,14 +1670,12 @@ static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) intel_pstate_get_hwp_cap(cpudata); if (type == FREQ_QOS_MIN) { - perf_pct = global.min_perf_pct; + freq = DIV_ROUND_UP(freq * global.min_perf_pct, 100); } else { req++; - perf_pct = global.max_perf_pct; + freq = (freq * global.max_perf_pct) / 100; } - freq = DIV_ROUND_UP(cpudata->pstate.turbo_freq * perf_pct, 100); - if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", cpu); } -- cgit v1.2.3 From f89c7fb83ae95578e355bed1a7aeea5f3ca5a067 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 30 May 2025 15:38:08 +0200 Subject: PM / devfreq: rockchip-dfi: double count on RK3588 On RK3588 with LPDDR4X memory, the cycle count as returned by perf stat -a -e rockchip_ddr/cycles/ sleep 1 consistently reads half as much as what the actual DDR frequency is at. For a LPDDR4X module running at 2112MHz, I get more like 1056059916 cycles per second, which is almost bang-on half what it should be. No, I'm not mixing up megatransfers and megahertz. Consulting the downstream driver, this appears to be because the RK3588 hardware specifically (and RK3528 as well, for future reference) needs a multiplier of 2 to get to the correct frequency with everything but LPDDR5. The RK3588's actual memory bandwidth measurements in MB/s are correct however, as confirmed with stress-ng --stream. This makes me think the access counters are not affected in the same way. This tracks with the vendor kernel not multiplying the access counts either. Solve this by adding a new member to the dfi struct, which each SoC can set to whatever they want, but defaults to 1 if left unset by the SoC init functions. The event_get_count op can then use this multiplier if the cycle count is requested. The cycle multiplier is not used in rockchip_dfi_get_event because the vendor driver doesn't use it there either, and we don't do other actual bandwidth unit conversion stuff in there anyway. Fixes: 481d97ba61e1 ("PM / devfreq: rockchip-dfi: add support for RK3588") Signed-off-by: Nicolas Frattaroli Signed-off-by: Chanwoo Choi Link: https://lore.kernel.org/lkml/20250530-rk3588-dfi-improvements-v1-1-6e077c243a95@collabora.com/ --- drivers/devfreq/event/rockchip-dfi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 0470d7c175f4..54effb635196 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -116,6 +116,7 @@ struct rockchip_dfi { int buswidth[DMC_MAX_CHANNELS]; int ddrmon_stride; bool ddrmon_ctrl_single; + unsigned int count_multiplier; /* number of data clocks per count */ }; static int rockchip_dfi_enable(struct rockchip_dfi *dfi) @@ -435,7 +436,7 @@ static u64 rockchip_ddr_perf_event_get_count(struct perf_event *event) switch (event->attr.config) { case PERF_EVENT_CYCLES: - count = total.c[0].clock_cycles; + count = total.c[0].clock_cycles * dfi->count_multiplier; break; case PERF_EVENT_READ_BYTES: for (i = 0; i < dfi->max_channels; i++) @@ -655,6 +656,9 @@ static int rockchip_ddr_perf_init(struct rockchip_dfi *dfi) break; } + if (!dfi->count_multiplier) + dfi->count_multiplier = 1; + ret = perf_pmu_register(pmu, "rockchip_ddr", -1); if (ret) return ret; @@ -751,6 +755,7 @@ static int rk3588_dfi_init(struct rockchip_dfi *dfi) dfi->max_channels = 4; dfi->ddrmon_stride = 0x4000; + dfi->count_multiplier = 2; return 0; }; -- cgit v1.2.3 From eddb5ba91b289faa15117d4fc1c2fb223f3493c2 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 30 May 2025 15:38:09 +0200 Subject: PM / devfreq: rockchip-dfi: add support for LPDDR5 The Rockchip RK3588 SoC can also support LPDDR5 memory. This type of memory needs some special case handling in the rockchip-dfi driver. Add support for it in rockchip-dfi, as well as the needed GRF register definitions. This has been tested as returning both the right cycle count and bandwidth on a LPDDR5 board where the CKR bit is 1. I couldn't test whether the values are correct on a system where CKR is 0, as I'm not savvy enough with the Rockchip tooling to know whether this can be set in the DDR init blob. Downstream has some special case handling for a hardware version where not just the control bits differ, but also the register. Since I don't know whether that hardware version is in any production silicon, it's left unimplemented for now, with an error message urging users to report if they have such a system. There is a slight change of behaviour for non-LPDDR5 systems: instead of writing 0 as the control flags to the control register and pretending everything is alright if the memory type is unknown, we now explicitly return an error. Signed-off-by: Nicolas Frattaroli Reviewed-by: Sascha Hauer Acked-by: Heiko Stuebner Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250530-rk3588-dfi-improvements-v1-2-6e077c243a95@collabora.com/ --- drivers/devfreq/event/rockchip-dfi.c | 84 ++++++++++++++++++++++++++++-------- include/soc/rockchip/rk3588_grf.h | 8 +++- include/soc/rockchip/rockchip_grf.h | 1 + 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 54effb635196..5a2c9badcc64 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -34,15 +34,18 @@ /* DDRMON_CTRL */ #define DDRMON_CTRL 0x04 +#define DDRMON_CTRL_LPDDR5 BIT(6) #define DDRMON_CTRL_DDR4 BIT(5) #define DDRMON_CTRL_LPDDR4 BIT(4) #define DDRMON_CTRL_HARDWARE_EN BIT(3) #define DDRMON_CTRL_LPDDR23 BIT(2) #define DDRMON_CTRL_SOFTWARE_EN BIT(1) #define DDRMON_CTRL_TIMER_CNT_EN BIT(0) -#define DDRMON_CTRL_DDR_TYPE_MASK (DDRMON_CTRL_DDR4 | \ +#define DDRMON_CTRL_DDR_TYPE_MASK (DDRMON_CTRL_LPDDR5 | \ + DDRMON_CTRL_DDR4 | \ DDRMON_CTRL_LPDDR4 | \ DDRMON_CTRL_LPDDR23) +#define DDRMON_CTRL_LP5_BANK_MODE_MASK GENMASK(8, 7) #define DDRMON_CH0_WR_NUM 0x20 #define DDRMON_CH0_RD_NUM 0x24 @@ -116,13 +119,60 @@ struct rockchip_dfi { int buswidth[DMC_MAX_CHANNELS]; int ddrmon_stride; bool ddrmon_ctrl_single; + u32 lp5_bank_mode; + bool lp5_ckr; /* true if in 4:1 command-to-data clock ratio mode */ unsigned int count_multiplier; /* number of data clocks per count */ }; +static int rockchip_dfi_ddrtype_to_ctrl(struct rockchip_dfi *dfi, u32 *ctrl, + u32 *mask) +{ + u32 ddrmon_ver; + + *mask = DDRMON_CTRL_DDR_TYPE_MASK; + + switch (dfi->ddr_type) { + case ROCKCHIP_DDRTYPE_LPDDR2: + case ROCKCHIP_DDRTYPE_LPDDR3: + *ctrl = DDRMON_CTRL_LPDDR23; + break; + case ROCKCHIP_DDRTYPE_LPDDR4: + case ROCKCHIP_DDRTYPE_LPDDR4X: + *ctrl = DDRMON_CTRL_LPDDR4; + break; + case ROCKCHIP_DDRTYPE_LPDDR5: + ddrmon_ver = readl_relaxed(dfi->regs); + if (ddrmon_ver < 0x40) { + *ctrl = DDRMON_CTRL_LPDDR5 | dfi->lp5_bank_mode; + *mask |= DDRMON_CTRL_LP5_BANK_MODE_MASK; + break; + } + + /* + * As it is unknown whether the unpleasant special case + * behaviour used by the vendor kernel is needed for any + * shipping hardware, ask users to report if they have + * some of that hardware. + */ + dev_err(&dfi->edev->dev, + "unsupported DDRMON version 0x%04X, please let linux-rockchip know!\n", + ddrmon_ver); + return -EOPNOTSUPP; + default: + dev_err(&dfi->edev->dev, "unsupported memory type 0x%X\n", + dfi->ddr_type); + return -EOPNOTSUPP; + } + + return 0; +} + static int rockchip_dfi_enable(struct rockchip_dfi *dfi) { void __iomem *dfi_regs = dfi->regs; int i, ret = 0; + u32 ctrl; + u32 ctrl_mask; mutex_lock(&dfi->mutex); @@ -136,8 +186,11 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) goto out; } + ret = rockchip_dfi_ddrtype_to_ctrl(dfi, &ctrl, &ctrl_mask); + if (ret) + goto out; + for (i = 0; i < dfi->max_channels; i++) { - u32 ctrl = 0; if (!(dfi->channel_mask & BIT(i))) continue; @@ -147,21 +200,7 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) DDRMON_CTRL_SOFTWARE_EN | DDRMON_CTRL_HARDWARE_EN), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); - /* set ddr type to dfi */ - switch (dfi->ddr_type) { - case ROCKCHIP_DDRTYPE_LPDDR2: - case ROCKCHIP_DDRTYPE_LPDDR3: - ctrl = DDRMON_CTRL_LPDDR23; - break; - case ROCKCHIP_DDRTYPE_LPDDR4: - case ROCKCHIP_DDRTYPE_LPDDR4X: - ctrl = DDRMON_CTRL_LPDDR4; - break; - default: - break; - } - - writel_relaxed(HIWORD_UPDATE(ctrl, DDRMON_CTRL_DDR_TYPE_MASK), + writel_relaxed(HIWORD_UPDATE(ctrl, ctrl_mask), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); /* enable count, use software mode */ @@ -652,6 +691,7 @@ static int rockchip_ddr_perf_init(struct rockchip_dfi *dfi) break; case ROCKCHIP_DDRTYPE_LPDDR4: case ROCKCHIP_DDRTYPE_LPDDR4X: + case ROCKCHIP_DDRTYPE_LPDDR5: dfi->burst_len = 16; break; } @@ -730,7 +770,7 @@ static int rk3568_dfi_init(struct rockchip_dfi *dfi) static int rk3588_dfi_init(struct rockchip_dfi *dfi) { struct regmap *regmap_pmu = dfi->regmap_pmu; - u32 reg2, reg3, reg4; + u32 reg2, reg3, reg4, reg6; regmap_read(regmap_pmu, RK3588_PMUGRF_OS_REG2, ®2); regmap_read(regmap_pmu, RK3588_PMUGRF_OS_REG3, ®3); @@ -757,6 +797,14 @@ static int rk3588_dfi_init(struct rockchip_dfi *dfi) dfi->ddrmon_stride = 0x4000; dfi->count_multiplier = 2; + if (dfi->ddr_type == ROCKCHIP_DDRTYPE_LPDDR5) { + regmap_read(regmap_pmu, RK3588_PMUGRF_OS_REG6, ®6); + dfi->lp5_bank_mode = FIELD_GET(RK3588_PMUGRF_OS_REG6_LP5_BANK_MODE, reg6) << 7; + dfi->lp5_ckr = FIELD_GET(RK3588_PMUGRF_OS_REG6_LP5_CKR, reg6); + if (dfi->lp5_ckr) + dfi->count_multiplier *= 2; + } + return 0; }; diff --git a/include/soc/rockchip/rk3588_grf.h b/include/soc/rockchip/rk3588_grf.h index 630b35a55064..02a7b2432d99 100644 --- a/include/soc/rockchip/rk3588_grf.h +++ b/include/soc/rockchip/rk3588_grf.h @@ -12,7 +12,11 @@ #define RK3588_PMUGRF_OS_REG3_DRAMTYPE_INFO_V3 GENMASK(13, 12) #define RK3588_PMUGRF_OS_REG3_SYSREG_VERSION GENMASK(31, 28) -#define RK3588_PMUGRF_OS_REG4 0x210 -#define RK3588_PMUGRF_OS_REG5 0x214 +#define RK3588_PMUGRF_OS_REG4 0x210 +#define RK3588_PMUGRF_OS_REG5 0x214 +#define RK3588_PMUGRF_OS_REG6 0x218 +#define RK3588_PMUGRF_OS_REG6_LP5_BANK_MODE GENMASK(2, 1) +/* Whether the LPDDR5 is in 2:1 (= 0) or 4:1 (= 1) CKR a.k.a. DQS mode */ +#define RK3588_PMUGRF_OS_REG6_LP5_CKR BIT(0) #endif /* __SOC_RK3588_GRF_H */ diff --git a/include/soc/rockchip/rockchip_grf.h b/include/soc/rockchip/rockchip_grf.h index e46fd72aea8d..41c7bb26fd53 100644 --- a/include/soc/rockchip/rockchip_grf.h +++ b/include/soc/rockchip/rockchip_grf.h @@ -13,6 +13,7 @@ enum { ROCKCHIP_DDRTYPE_LPDDR3 = 6, ROCKCHIP_DDRTYPE_LPDDR4 = 7, ROCKCHIP_DDRTYPE_LPDDR4X = 8, + ROCKCHIP_DDRTYPE_LPDDR5 = 9, }; #endif /* __SOC_ROCKCHIP_GRF_H */ -- cgit v1.2.3 From 5590db443a40a35d7fba2db12701346be621a19e Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Sat, 6 Sep 2025 17:23:16 +0530 Subject: cpufreq: conservative: Replace sscanf() with kstrtouint() Replace sscanf() with kstrtouint() in all sysfs store functions to improve input validation and security. The kstrtouint() function provides better error detection, overflow protection, and consistent error handling compared to sscanf(). This maintains existing functionality while improving input validation robustness and following kernel coding best practices for string parsing. Signed-off-by: Kaushlendra Kumar Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250906115316.3010384-1-kaushlendra.kumar@intel.com [ rjw: Dropped duplicate paragraph from the changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 56500b25d77c..cce6a8d113e1 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -152,9 +152,9 @@ static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); + ret = kstrtouint(buf, 0, &input); - if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + if (ret || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; dbs_data->sampling_down_factor = input; @@ -168,9 +168,9 @@ static ssize_t up_threshold_store(struct gov_attr_set *attr_set, struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); + ret = kstrtouint(buf, 0, &input); - if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) + if (ret || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL; dbs_data->up_threshold = input; @@ -184,10 +184,10 @@ static ssize_t down_threshold_store(struct gov_attr_set *attr_set, struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); + ret = kstrtouint(buf, 0, &input); /* cannot be lower than 1 otherwise freq will not fall */ - if (ret != 1 || input < 1 || input >= dbs_data->up_threshold) + if (ret || input < 1 || input >= dbs_data->up_threshold) return -EINVAL; cs_tuners->down_threshold = input; @@ -201,9 +201,9 @@ static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); - if (ret != 1) - return -EINVAL; + ret = kstrtouint(buf, 0, &input); + if (ret) + return ret; if (input > 1) input = 1; @@ -226,10 +226,10 @@ static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf, struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); + ret = kstrtouint(buf, 0, &input); - if (ret != 1) - return -EINVAL; + if (ret) + return ret; if (input > 100) input = 100; -- cgit v1.2.3 From 7f3cfb7943d27a7b61bdac8db739cf0bdc28e87d Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Mon, 8 Sep 2025 16:06:55 -0700 Subject: cpufreq: ondemand: Update the efficient idle check for Intel extended Families IO time is considered busy by default for modern Intel processors. The current check covers recent Family 6 models but excludes the brand new Families 18 and 19. According to Arjan van de Ven, the model check was mainly due to a lack of testing on systems before INTEL_CORE2_MEROM. He suggests considering all Intel processors as having an efficient idle. Extend the IO busy classification to all Intel processors starting with Family 6, including Family 15 (Pentium 4s) and upcoming Families 18/19. Use an x86 VFM check and move the function to the header file to avoid using arch-specific #ifdefs in the C file. Signed-off-by: Sohil Mehta Link: https://patch.msgid.link/20250908230655.2562440-1-sohil.mehta@intel.com [ rjw: Added empty line after #include ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 25 +------------------------ drivers/cpufreq/cpufreq_ondemand.h | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 0e65d37c9231..a6ecc203f7b7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -29,29 +29,6 @@ static struct od_ops od_ops; static unsigned int default_powersave_bias; -/* - * Not all CPUs want IO time to be accounted as busy; this depends on how - * efficient idling at a higher frequency/voltage is. - * Pavel Machek says this is not so for various generations of AMD and old - * Intel systems. - * Mike Chan (android.com) claims this is also not true for ARM. - * Because of this, whitelist specific known (series) of CPUs by default, and - * leave all others up to the user. - */ -static int should_io_be_busy(void) -{ -#if defined(CONFIG_X86) - /* - * For Intel, Core 2 (model 15) and later have an efficient idle. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model >= 15) - return 1; -#endif - return 0; -} - /* * Find right freq to be set now with powersave_bias on. * Returns the freq_hi to be used right now and will set freq_hi_delay_us, @@ -377,7 +354,7 @@ static int od_init(struct dbs_data *dbs_data) dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; - dbs_data->io_is_busy = should_io_be_busy(); + dbs_data->io_is_busy = od_should_io_be_busy(); dbs_data->tuners = tuners; return 0; diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h index 1af8e5c4b86f..2ca8f1aaf2e3 100644 --- a/drivers/cpufreq/cpufreq_ondemand.h +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -24,3 +24,26 @@ static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol struct od_dbs_tuners { unsigned int powersave_bias; }; + +#ifdef CONFIG_X86 +#include + +/* + * Not all CPUs want IO time to be accounted as busy; this depends on + * how efficient idling at a higher frequency/voltage is. + * + * Pavel Machek says this is not so for various generations of AMD and + * old Intel systems. Mike Chan (android.com) claims this is also not + * true for ARM. + * + * Because of this, select a known series of Intel CPUs (Family 6 and + * later) by default, and leave all others up to the user. + */ +static inline bool od_should_io_be_busy(void) +{ + return (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO); +} +#else +static inline bool od_should_io_be_busy(void) { return false; } +#endif -- cgit v1.2.3 From cdc06f912670c8c199d5fa9e78b64b7ed8e871d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Sep 2025 17:22:12 +0200 Subject: cpuidle: qcom-spm: fix device and OF node leaks at probe Make sure to drop the reference to the saw device taken by of_find_device_by_node() after retrieving its driver data during probe(). Also drop the reference to the CPU node sooner to avoid leaking it in case there is no saw node or device. Fixes: 60f3692b5f0b ("cpuidle: qcom_spm: Detach state machine from main SPM handling") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-qcom-spm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index 5f386761b156..f60a4cf53642 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -96,20 +96,23 @@ static int spm_cpuidle_register(struct device *cpuidle_dev, int cpu) return -ENODEV; saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0); + of_node_put(cpu_node); if (!saw_node) return -ENODEV; pdev = of_find_device_by_node(saw_node); of_node_put(saw_node); - of_node_put(cpu_node); if (!pdev) return -ENODEV; data = devm_kzalloc(cpuidle_dev, sizeof(*data), GFP_KERNEL); - if (!data) + if (!data) { + put_device(&pdev->dev); return -ENOMEM; + } data->spm = dev_get_drvdata(&pdev->dev); + put_device(&pdev->dev); if (!data->spm) return -EINVAL; -- cgit v1.2.3 From a2d100c47f4eccb0cb6c2a6a915f2c9e13b54ae7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Sep 2025 17:22:13 +0200 Subject: cpuidle: qcom-spm: drop unnecessary initialisations Drop the unnecessary initialisations of the platform device and driver data pointers which are assigned on first use when registering the cpuidle device during probe. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-qcom-spm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index f60a4cf53642..7ab6f68b96a8 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -86,9 +86,9 @@ static const struct of_device_id qcom_idle_state_match[] = { static int spm_cpuidle_register(struct device *cpuidle_dev, int cpu) { - struct platform_device *pdev = NULL; + struct platform_device *pdev; struct device_node *cpu_node, *saw_node; - struct cpuidle_qcom_spm_data *data = NULL; + struct cpuidle_qcom_spm_data *data; int ret; cpu_node = of_cpu_device_node_get(cpu); -- cgit v1.2.3 From 1ebe8f7e782523e62cd1fa8237f7afba5d1dae83 Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Sun, 31 Aug 2025 22:43:57 +0100 Subject: PM: EM: Fix late boot with holes in CPU topology Commit e3f1164fc9ee ("PM: EM: Support late CPUs booting and capacity adjustment") added a mechanism to handle CPUs that come up late by retrying when any of the `cpufreq_cpu_get()` call fails. However, if there are holes in the CPU topology (offline CPUs, e.g. nosmt), the first missing CPU causes the loop to break, preventing subsequent online CPUs from being updated. Instead of aborting on the first missing CPU policy, loop through all and retry if any were missing. Fixes: e3f1164fc9ee ("PM: EM: Support late CPUs booting and capacity adjustment") Suggested-by: Kenneth Crudup Reported-by: Kenneth Crudup Link: https://lore.kernel.org/linux-pm/40212796-734c-4140-8a85-854f72b8144d@panix.com/ Cc: 6.9+ # 6.9+ Signed-off-by: Christian Loehle Link: https://patch.msgid.link/20250831214357.2020076-1-christian.loehle@arm.com [ rjw: Drop the new pr_debug() message which is not very useful ] Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8df55397414a..5f17d2e8e954 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -799,7 +799,7 @@ void em_adjust_cpu_capacity(unsigned int cpu) static void em_check_capacity_update(void) { cpumask_var_t cpu_done_mask; - int cpu; + int cpu, failed_cpus = 0; if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { pr_warn("no free memory\n"); @@ -817,10 +817,8 @@ static void em_check_capacity_update(void) policy = cpufreq_cpu_get(cpu); if (!policy) { - pr_debug("Accessing cpu%d policy failed\n", cpu); - schedule_delayed_work(&em_update_work, - msecs_to_jiffies(1000)); - break; + failed_cpus++; + continue; } cpufreq_cpu_put(policy); @@ -835,6 +833,9 @@ static void em_check_capacity_update(void) em_adjust_new_capacity(cpu, dev, pd); } + if (failed_cpus) + schedule_delayed_work(&em_update_work, msecs_to_jiffies(1000)); + free_cpumask_var(cpu_done_mask); } -- cgit v1.2.3 From 995694ef91da7076ce235f028680fb31ad0b5c04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Sep 2025 14:04:53 +0200 Subject: cpufreq: ACPI: Use on_each_cpu_mask() in drv_write() Make drv_write() call on_each_cpu_mask() instead of using an open-coded equivalent of the latter. Also remove a comment mentioning the smp_call_function_many() usage which is not particularly useful anyway. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- drivers/cpufreq/acpi-cpufreq.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 4f7f9201598d..083d8369a591 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -318,7 +318,6 @@ static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) return cmd.val; } -/* Called via smp_call_function_many(), on the target CPUs */ static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; @@ -335,14 +334,8 @@ static void drv_write(struct acpi_cpufreq_data *data, .val = val, .func.write = data->cpu_freq_write, }; - int this_cpu; - this_cpu = get_cpu(); - if (cpumask_test_cpu(this_cpu, mask)) - do_drv_write(&cmd); - - smp_call_function_many(mask, do_drv_write, &cmd, 1); - put_cpu(); + on_each_cpu_mask(mask, do_drv_write, &cmd, true); } static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) -- cgit v1.2.3 From 57b100d4cf14276e0340eecb561005c07c129eb8 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Thu, 28 Aug 2025 12:00:00 +0530 Subject: tools/cpupower: fix error return value in cpupower_write_sysfs() The cpupower_write_sysfs() function currently returns -1 on write failure, but the function signature indicates it should return an unsigned int. Returning -1 from an unsigned function results in a large positive value rather than indicating an error condition. Fix this by returning 0 on failure, which is more appropriate for an unsigned return type and maintains consistency with typical success/failure semantics where 0 indicates failure and non-zero indicates success (bytes written). Link: https://lore.kernel.org/r/20250828063000.803229-1-kaushlendra.kumar@intel.com Signed-off-by: Kaushlendra Kumar Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpupower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index ce8dfb8e46ab..d7f7ec6f151c 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -56,7 +56,7 @@ unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen) if (numwritten < 1) { perror(path); close(fd); - return -1; + return 0; } close(fd); -- cgit v1.2.3 From 46c435cbaf5591a349c339e080ac2a228aa99649 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Sep 2025 14:02:33 +0200 Subject: cpufreq: intel_pstate: Enable HWP without EPP if DEC is enabled So far, HWP has never been enabled without EPP (Energy-Performance Preference) interface support, since the lack of the latter indicates an incomplete implementation of HWP, which was the case on early development vehicle platforms. However, HWP can be expected to work if DEC (Dynamic Efficiency Control) is enabled as indicated by setting bit 27 in MSR_IA32_POWER_CTL (DEC enable bit). Accordingly, allow HWP to be enabled if the EPP interface is not supported so long as DEC is enabled in the processor. Still, the EPP control sysfs interface is useless when EPP is not supported, so do not expose it in that case. Link: https://lore.kernel.org/linux-pm/20250904000608.260817-2-srinivas.pandruvada@linux.intel.com/ Signed-off-by: Srinivas Pandruvada Co-developed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 72 ++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 20746c35d3ba..285051e4a5dd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -897,11 +897,19 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf) cpufreq_freq_attr_ro(base_frequency); +enum hwp_cpufreq_attr_index { + HWP_BASE_FREQUENCY_INDEX = 0, + HWP_PERFORMANCE_PREFERENCE_INDEX, + HWP_PERFORMANCE_AVAILABLE_PREFERENCES_INDEX, + HWP_CPUFREQ_ATTR_COUNT, +}; + static struct freq_attr *hwp_cpufreq_attrs[] = { - &energy_performance_preference, - &energy_performance_available_preferences, - &base_frequency, - NULL, + [HWP_BASE_FREQUENCY_INDEX] = &base_frequency, + [HWP_PERFORMANCE_PREFERENCE_INDEX] = &energy_performance_preference, + [HWP_PERFORMANCE_AVAILABLE_PREFERENCES_INDEX] = + &energy_performance_available_preferences, + [HWP_CPUFREQ_ATTR_COUNT] = NULL, }; static bool no_cas __ro_after_init; @@ -1370,6 +1378,9 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) #define POWER_CTL_EE_ENABLE 1 #define POWER_CTL_EE_DISABLE 2 +/* Enable bit for Dynamic Efficiency Control (DEC) */ +#define POWER_CTL_DEC_ENABLE 27 + static int power_ctl_ee_state; static void set_power_ctl_ee_state(bool input) @@ -3758,6 +3769,26 @@ static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { {} }; +static bool hwp_check_epp(void) +{ + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) + return true; + + /* Without EPP support, don't expose EPP-related sysfs attributes. */ + hwp_cpufreq_attrs[HWP_PERFORMANCE_PREFERENCE_INDEX] = NULL; + hwp_cpufreq_attrs[HWP_PERFORMANCE_AVAILABLE_PREFERENCES_INDEX] = NULL; + + return false; +} + +static bool hwp_check_dec(void) +{ + u64 power_ctl; + + rdmsrq(MSR_IA32_POWER_CTL, power_ctl); + return !!(power_ctl & BIT(POWER_CTL_DEC_ENABLE)); +} + static int __init intel_pstate_init(void) { static struct cpudata **_all_cpu_data; @@ -3778,23 +3809,32 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { - hwp_forced = intel_pstate_hwp_is_enabled(); + bool epp_present = hwp_check_epp(); - if (hwp_forced) + /* + * If HWP is enabled already, there is no choice but to deal + * with it. + */ + hwp_forced = intel_pstate_hwp_is_enabled(); + if (hwp_forced) { pr_info("HWP enabled by BIOS\n"); - else if (no_load) + no_hwp = 0; + } else if (no_load) { return -ENODEV; + } else if (!epp_present && !hwp_check_dec()) { + /* + * Avoid enabling HWP for processors without EPP support + * unless the Dynamic Efficiency Control (DEC) enable + * bit (MSR_IA32_POWER_CTL, bit 27) is set because that + * means incomplete HWP implementation which is a corner + * case and supporting it is generally problematic. + */ + no_hwp = 1; + } copy_cpu_funcs(&core_funcs); - /* - * Avoid enabling HWP for processors without EPP support, - * because that means incomplete HWP implementation which is a - * corner case and supporting it is generally problematic. - * - * If HWP is enabled already, though, there is no choice but to - * deal with it. - */ - if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { + + if (!no_hwp) { hwp_active = true; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; -- cgit v1.2.3 From 7c0dde86c17665cb27e1c8dd23d263e2ed2d5b50 Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Mon, 8 Sep 2025 16:57:38 +0800 Subject: cpufreq: Add defensive check during driver registration Currently, cpufreq allows drivers to implement both ->target() and ->target_index() callbacks, but that can lead to ambiguous or incorrect behavior. For this reason, prevent cpufreq drivers implementing both ->target() and ->target_index() at the same time from registering. This check can help to catch driver implementation mistakes early and improve overall robustness, without affecting existing valid drivers. Signed-off-by: Zihuan Zhang Link: https://patch.msgid.link/20250908085738.31602-1-zhangzihuan@kylinos.cn [ rjw: Subject adjustment and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 30e8e9b3c12f..599a7af76b3d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2911,6 +2911,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) return -EPROBE_DEFER; if (!driver_data || !driver_data->verify || !driver_data->init || + (driver_data->target_index && driver_data->target) || (!!driver_data->setpolicy == (driver_data->target_index || driver_data->target)) || (!driver_data->get_intermediate != !driver_data->target_intermediate) || (!driver_data->online != !driver_data->offline) || -- cgit v1.2.3 From 02d09026a88f227aa1f4687bd31d6ffc4970e8be Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Fri, 12 Sep 2025 15:35:02 +0800 Subject: cpufreq: intel_pstate: Use likely() optimization in intel_pstate_sample() The comment above the condition `if (cpu->last_sample_time)` clearly indicates that the branch is taken for the vast majority of invocations after the first sample in a cycle. The first sample is a one-time initialization case. Add likely() hint to the condition to improve branch prediction for this performance-critical path in intel_pstate_sample(). Signed-off-by: Yaxiong Tian Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 285051e4a5dd..05e440a6aa4a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2542,7 +2542,7 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) * that sample.time will always be reset before setting the utilization * update hook and make the caller skip the sample then. */ - if (cpu->last_sample_time) { + if (likely(cpu->last_sample_time)) { intel_pstate_calc_avg_perf(cpu); return true; } -- cgit v1.2.3 From 8dba0fd9cee34b80d6e26a188115e5427773285a Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Fri, 19 Sep 2025 22:26:57 +0530 Subject: cpuidle: sysfs: Use sysfs_emit()/sysfs_emit_at() instead of sprintf()/scnprintf() The ->show() callbacks in sysfs should use sysfs_emit() or sysfs_emit_at() when formatting values for user space. These helpers are the recommended way to ensure correct buffer handling and consistency across the kernel. See Documentation/filesystems/sysfs.rst for details. No functional change intended. Suggested-by: Joe Perches Signed-off-by: Vivek Yadav Link: https://patch.msgid.link/20250919165657.233349-1-vivekyadav1207731111@gmail.com [ rjw: Minor subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index d6f5da61cb7d..61de64817604 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -27,14 +27,14 @@ static ssize_t show_available_governors(struct device *dev, mutex_lock(&cpuidle_lock); list_for_each_entry(tmp, &cpuidle_governors, governor_list) { - if (i >= (ssize_t) (PAGE_SIZE - (CPUIDLE_NAME_LEN + 2))) + if (i >= (ssize_t)(PAGE_SIZE - (CPUIDLE_NAME_LEN + 2))) goto out; - i += scnprintf(&buf[i], CPUIDLE_NAME_LEN + 1, "%s ", tmp->name); + i += sysfs_emit_at(buf, i, "%.*s ", CPUIDLE_NAME_LEN, tmp->name); } out: - i+= sprintf(&buf[i], "\n"); + i += sysfs_emit_at(buf, i, "\n"); mutex_unlock(&cpuidle_lock); return i; } @@ -49,9 +49,9 @@ static ssize_t show_current_driver(struct device *dev, spin_lock(&cpuidle_driver_lock); drv = cpuidle_get_driver(); if (drv) - ret = sprintf(buf, "%s\n", drv->name); + ret = sysfs_emit(buf, "%s\n", drv->name); else - ret = sprintf(buf, "none\n"); + ret = sysfs_emit(buf, "none\n"); spin_unlock(&cpuidle_driver_lock); return ret; @@ -65,9 +65,9 @@ static ssize_t show_current_governor(struct device *dev, mutex_lock(&cpuidle_lock); if (cpuidle_curr_governor) - ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name); + ret = sysfs_emit(buf, "%s\n", cpuidle_curr_governor->name); else - ret = sprintf(buf, "none\n"); + ret = sysfs_emit(buf, "none\n"); mutex_unlock(&cpuidle_lock); return ret; @@ -230,7 +230,7 @@ static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0644, show, store) static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, char *buf) \ { \ - return sprintf(buf, "%u\n", state->_name);\ + return sysfs_emit(buf, "%u\n", state->_name);\ } #define define_show_state_ull_function(_name) \ @@ -238,7 +238,7 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, \ char *buf) \ { \ - return sprintf(buf, "%llu\n", state_usage->_name);\ + return sysfs_emit(buf, "%llu\n", state_usage->_name);\ } #define define_show_state_str_function(_name) \ @@ -247,8 +247,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ char *buf) \ { \ if (state->_name[0] == '\0')\ - return sprintf(buf, "\n");\ - return sprintf(buf, "%s\n", state->_name);\ + return sysfs_emit(buf, "\n");\ + return sysfs_emit(buf, "%s\n", state->_name);\ } #define define_show_state_time_function(_name) \ @@ -256,7 +256,7 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, \ char *buf) \ { \ - return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \ + return sysfs_emit(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \ } define_show_state_time_function(exit_latency) @@ -273,14 +273,14 @@ static ssize_t show_state_time(struct cpuidle_state *state, struct cpuidle_state_usage *state_usage, char *buf) { - return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns)); + return sysfs_emit(buf, "%llu\n", ktime_to_us(state_usage->time_ns)); } static ssize_t show_state_disable(struct cpuidle_state *state, struct cpuidle_state_usage *state_usage, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", state_usage->disable & CPUIDLE_STATE_DISABLED_BY_USER); } @@ -310,7 +310,7 @@ static ssize_t show_state_default_status(struct cpuidle_state *state, struct cpuidle_state_usage *state_usage, char *buf) { - return sprintf(buf, "%s\n", + return sysfs_emit(buf, "%s\n", state->flags & CPUIDLE_FLAG_OFF ? "disabled" : "enabled"); } @@ -358,7 +358,7 @@ static ssize_t show_state_s2idle_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, \ char *buf) \ { \ - return sprintf(buf, "%llu\n", state_usage->s2idle_##_name);\ + return sysfs_emit(buf, "%llu\n", state_usage->s2idle_##_name);\ } define_show_state_s2idle_ull_function(usage); @@ -550,7 +550,7 @@ static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf) ssize_t ret; spin_lock(&cpuidle_driver_lock); - ret = sprintf(buf, "%s\n", drv ? drv->name : "none"); + ret = sysfs_emit(buf, "%s\n", drv ? drv->name : "none"); spin_unlock(&cpuidle_driver_lock); return ret; -- cgit v1.2.3 From 7b1b7961170e4fcad488755e5ffaaaf9bd527e8f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 19 Sep 2025 13:22:20 +0200 Subject: cpuidle: Fail cpuidle device registration if there is one already Refuse to register a cpuidle device if the given CPU has a cpuidle device already and print a message regarding it. Without this, an attempt to register a new cpuidle device without unregistering the existing one leads to the removal of the existing cpuidle device without removing its sysfs interface. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 0835da449db8..56132e843c99 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -635,8 +635,14 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) static int __cpuidle_register_device(struct cpuidle_device *dev) { struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + unsigned int cpu = dev->cpu; int i, ret; + if (per_cpu(cpuidle_devices, cpu)) { + pr_info("CPU%d: cpuidle device already registered\n", cpu); + return -EEXIST; + } + if (!try_module_get(drv->owner)) return -EINVAL; @@ -648,7 +654,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; } - per_cpu(cpuidle_devices, dev->cpu) = dev; + per_cpu(cpuidle_devices, cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); ret = cpuidle_coupled_register_device(dev); -- cgit v1.2.3 From 9a1aa642c1fb5a3776447182aaf37dfaafb36134 Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Tue, 23 Sep 2025 15:55:53 +0800 Subject: cpufreq: Replace pointer subtraction with iteration macro The cpufreq documentation suggests avoiding direct pointer subtraction when working with entries in driver_freq_table, as it is relatively costly. Instead, the recommended approach is to use the provided iteration macros, like cpufreq_for_each_valid_entry_idx(). Use cpufreq_for_each_entry_idx() instead of pointer subtraction in cpufreq_frequency_table_cpuinfo() which improves code clarity and follows the established cpufreq coding style. While at it, remove redundant local variable initialization from cpufreq_table_index_unsorted(). No functional change intended. Signed-off-by: Zihuan Zhang Link: https://patch.msgid.link/20250923075553.45532-1-zhangzihuan@kylinos.cn [ rjw: Subject tweak, changelog edits, local variable definition tweak ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/freq_table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index d5111ee56e38..7f251daf03ce 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -33,16 +33,16 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy) struct cpufreq_frequency_table *pos, *table = policy->freq_table; unsigned int min_freq = ~0; unsigned int max_freq = 0; - unsigned int freq; + unsigned int freq, i; - cpufreq_for_each_valid_entry(pos, table) { + cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; if ((!cpufreq_boost_enabled() || !policy->boost_enabled) && (pos->flags & CPUFREQ_BOOST_FREQ)) continue; - pr_debug("table entry %u: %u kHz\n", (int)(pos - table), freq); + pr_debug("table entry %u: %u kHz\n", i, freq); if (freq < min_freq) min_freq = freq; if (freq > max_freq) @@ -126,7 +126,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, }; struct cpufreq_frequency_table *pos; struct cpufreq_frequency_table *table = policy->freq_table; - unsigned int freq, diff, i = 0; + unsigned int freq, diff, i; int index; pr_debug("request for target %u kHz (relation: %u) for cpu %u\n", -- cgit v1.2.3 From e3f761be5a178bdd5cd80351c5ca0a0cf675ef7e Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Mon, 25 Aug 2025 04:09:02 +0800 Subject: tools/power/x86/amd_pstate_tracer: Fix python gnuplot package names The prerequisites section listed non-existent packages "phython-gnuplot" and "phython3-gnuplot", which may mislead users and cause installation failures. Update the names to the correct distribution package names "python-gnuplot" and "python3-gnuplot", helping users avoid confusion and saving time for those following the instructions. Signed-off-by: Kuan-Wei Chiu Reviewed-by: Mario Limonciello Signed-off-by: Mario Limonciello (AMD) --- tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py index feb9f9421c7b..875b086550d1 100755 --- a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py +++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py @@ -11,7 +11,7 @@ Prerequisites: gnuplot 5.0 or higher gnuplot-py 1.8 or higher (Most of the distributions have these required packages. They may be called - gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... ) + gnuplot-py, python-gnuplot or python3-gnuplot, gnuplot-nox, ... ) Kernel config for Linux trace is enabled -- cgit v1.2.3 From 23199d2aa6dcaf6dd2da772f93d2c94317d71459 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Wed, 17 Sep 2025 10:38:20 +0530 Subject: tools/cpupower: Fix incorrect size in cpuidle_state_disable() Fix incorrect size parameter passed to cpuidle_state_write_file() in cpuidle_state_disable(). The function was incorrectly using sizeof(disable) which returns the size of the unsigned int variable (4 bytes) instead of the actual length of the string stored in the 'value' buffer. Since 'value' is populated with snprintf() to contain the string representation of the disable value, we should use the length returned by snprintf() to get the correct string length for writing to the sysfs file. This ensures the correct number of bytes is written to the cpuidle state disable file in sysfs. Link: https://lore.kernel.org/r/20250917050820.1785377-1-kaushlendra.kumar@intel.com Signed-off-by: Kaushlendra Kumar Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpuidle.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 0ecac009273c..f2c1139adf71 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -233,6 +233,7 @@ int cpuidle_state_disable(unsigned int cpu, { char value[SYSFS_PATH_MAX]; int bytes_written; + int len; if (cpuidle_state_count(cpu) <= idlestate) return -1; @@ -241,10 +242,10 @@ int cpuidle_state_disable(unsigned int cpu, idlestate_value_files[IDLESTATE_DISABLE])) return -2; - snprintf(value, SYSFS_PATH_MAX, "%u", disable); + len = snprintf(value, SYSFS_PATH_MAX, "%u", disable); bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable", - value, sizeof(disable)); + value, len); if (bytes_written) return 0; return -3; -- cgit v1.2.3 From 469d80a3712c66a00b5bb888e62e809db8887ba7 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 25 Sep 2025 13:51:06 -0500 Subject: PM: hibernate: Fix hybrid-sleep Hybrid sleep will hibernate the system followed by running through the suspend routine. Since both the hibernate and the suspend routine will call pm_restrict_gfp_mask(), pm_restore_gfp_mask() must be called before starting the suspend sequence. Add an explicit call to pm_restore_gfp_mask() to power_down() before the suspend sequence starts. Add an extra call for pm_restrict_gfp_mask() when exiting suspend so that the pm_restore_gfp_mask() call in hibernate() is balanced. Reported-by: Ionut Nechita Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4573 Tested-by: Ionut Nechita Fixes: 12ffc3b1513eb ("PM: Restrict swap use to later in the suspend sequence") Tested-by: Kenneth Crudup Acked-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/20250925185108.2968494-2-superm1@kernel.org [ rjw: Add comment explainig the new pm_restrict_gfp_mask() call purpose ] Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2f66ab453823..e0cdb8d9fd45 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -695,12 +695,16 @@ static void power_down(void) #ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { + pm_restore_gfp_mask(); error = suspend_devices_and_enter(mem_sleep_current); if (error) { hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : HIBERNATION_SHUTDOWN; } else { + /* Match pm_restore_gfp_mask() call in hibernate() */ + pm_restrict_gfp_mask(); + /* Restore swap signature. */ error = swsusp_unmark(); if (error) -- cgit v1.2.3 From 495c8d35035edb66e3284113bef01f3b1b843832 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 25 Sep 2025 13:51:07 -0500 Subject: PM: hibernate: Add pm_hibernation_mode_is_suspend() Some drivers have different flows for hibernation and suspend. If the driver opportunistically will skip thaw() then it needs a hint to know what is happening after the hibernate. Introduce a new symbol pm_hibernation_mode_is_suspend() that drivers can call to determine if suspending the system for this purpose. Tested-by: Ionut Nechita Tested-by: Kenneth Crudup Acked-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 ++ kernel/power/hibernate.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 317ae31e89b3..0664c685f0b2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -276,6 +276,7 @@ extern void arch_suspend_enable_irqs(void); extern int pm_suspend(suspend_state_t state); extern bool sync_on_suspend_enabled; +bool pm_hibernation_mode_is_suspend(void); #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL @@ -288,6 +289,7 @@ static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } +static inline bool pm_hibernation_mode_is_suspend(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e0cdb8d9fd45..5d146218cae8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -80,6 +80,17 @@ static const struct platform_hibernation_ops *hibernation_ops; static atomic_t hibernate_atomic = ATOMIC_INIT(1); +#ifdef CONFIG_SUSPEND +/** + * pm_hibernation_mode_is_suspend - Check if hibernation has been set to suspend + */ +bool pm_hibernation_mode_is_suspend(void) +{ + return hibernation_mode == HIBERNATION_SUSPEND; +} +EXPORT_SYMBOL_GPL(pm_hibernation_mode_is_suspend); +#endif + bool hibernate_acquire(void) { return atomic_add_unless(&hibernate_atomic, -1, 0); -- cgit v1.2.3 From 0a6e9e098fcc318fec0f45a05a5c4743a81a60d9 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 25 Sep 2025 13:51:08 -0500 Subject: drm/amd: Fix hybrid sleep [Why] commit 530694f54dd5e ("drm/amdgpu: do not resume device in thaw for normal hibernation") optimized the flow for systems that are going into S4 where the power would be turned off. Basically the thaw() callback wouldn't resume the device if the hibernation image was successfully created since the system would be powered off. This however isn't the correct flow for a system entering into s0i3 after the hibernation image is created. Some of the amdgpu callbacks have different behavior depending upon the intended state of the suspend. [How] Use pm_hibernation_mode_is_suspend() as an input to decide whether to run resume during thaw() callback. Reported-by: Ionut Nechita Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4573 Tested-by: Ionut Nechita Fixes: 530694f54dd5e ("drm/amdgpu: do not resume device in thaw for normal hibernation") Acked-by: Alex Deucher Tested-by: Kenneth Crudup Signed-off-by: Mario Limonciello (AMD) Cc: 6.17+ # 6.17+: 495c8d35035e: PM: hibernate: Add pm_hibernation_mode_is_suspend() Signed-off-by: Rafael J. Wysocki --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 395c6be901ce..dcea66aadfa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2665,7 +2665,7 @@ static int amdgpu_pmops_thaw(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); /* do not resume device if it's normal hibernation */ - if (!pm_hibernate_is_recovering()) + if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend()) return 0; return amdgpu_device_resume(drm_dev, true); -- cgit v1.2.3 From cafb47be3f38ad81306bf894e743bebc2ccf66ab Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Sat, 9 Aug 2025 10:35:15 +0530 Subject: tools/power turbostat: Fix incorrect sorting of PMT telemetry The pmt_telemdir_sort() comparison function was returning a boolean value (0 or 1) instead of the required negative, zero, or positive value for proper sorting. This caused unpredictable and incorrect ordering of telemetry directories named telem0, telem1, ..., telemN. Update the comparison logic to return -1, 0, or 1 based on the numerical value extracted from the directory name, ensuring correct numerical ordering when using scandir. This change improves stability and correctness when iterating PMT telemetry directories. Signed-off-by: Kaushlendra Kumar Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5230e072e414..b391196c3e90 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1649,7 +1649,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) sscanf((*a)->d_name, "telem%u", &aidx); sscanf((*b)->d_name, "telem%u", &bidx); - return aidx >= bidx; + return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0; } const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) -- cgit v1.2.3 From 62127655b7ab7b8c2997041aca48a81bf5c6da0c Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Wed, 13 Aug 2025 12:32:08 +0530 Subject: tools/power x86_energy_perf_policy: Fix incorrect fopen mode usage The fopen_or_die() function was previously hardcoded to open files in read-only mode ("r"), ignoring the mode parameter passed to it. This patch corrects fopen_or_die() to use the provided mode argument, allowing for flexible file access as intended. Additionally, the call to fopen_or_die() in err_on_hypervisor() incorrectly used the mode "ro", which is not a valid fopen mode. This is fixed to use the correct "r" mode. Signed-off-by: Kaushlendra Kumar Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index ebda9c366b2b..c883f211dbcc 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -630,7 +630,7 @@ void cmdline(int argc, char **argv) */ FILE *fopen_or_die(const char *path, const char *mode) { - FILE *filep = fopen(path, "r"); + FILE *filep = fopen(path, mode); if (!filep) err(1, "%s: open failed", path); @@ -644,7 +644,7 @@ void err_on_hypervisor(void) char *buffer; /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ - cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + cpuinfo = fopen_or_die("/proc/cpuinfo", "r"); buffer = malloc(4096); if (!buffer) { -- cgit v1.2.3 From b6b42a6051b203bb1d78bf6518007e5dc8b62fc4 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 13:46:55 -0400 Subject: tools/power x86_energy_perf_policy: Enhance HWP enabled check Verify that all CPUs have HWP enabled, not just cpu0. Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 44 ++++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index c883f211dbcc..2bf0f878441e 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -4,7 +4,7 @@ * policy preference bias on recent X86 processors. */ /* - * Copyright (c) 2010 - 2017 Intel Corporation. + * Copyright (c) 2010 - 2025 Intel Corporation. * Len Brown */ @@ -517,7 +517,7 @@ void for_packages(unsigned long long pkg_set, int (func)(int)) void print_version(void) { - printf("x86_energy_perf_policy 17.05.11 (C) Len Brown \n"); + printf("x86_energy_perf_policy 2025.9.19 Len Brown \n"); } void cmdline(int argc, char **argv) @@ -1312,6 +1312,17 @@ void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) func(cpu_num); } +int for_all_cpus_in_set_and(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) +{ + int cpu_num; + int retval = 1; + + for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num) + if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) + retval &= func(cpu_num); + + return retval; +} void init_data_structures(void) { @@ -1326,21 +1337,38 @@ void init_data_structures(void) for_all_proc_cpus(mark_cpu_present); } -/* clear has_hwp if it is not enable (or being enabled) */ +int is_hwp_enabled_on_cpu(int cpu_num) +{ + unsigned long long msr; + int retval; + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + get_msr(cpu_num, MSR_PM_ENABLE, &msr); + retval = (msr & 1); + if (verbose) + fprintf(stderr, "cpu%d: %sHWP\n", cpu_num, retval ? "" : "No-"); + + return retval; +} + +/* + * verify_hwp_is_enabled() + * + * Set (has_hwp=0) if no HWP feature or any of selected CPU set does not have HWP enabled + */ void verify_hwp_is_enabled(void) { - unsigned long long msr; + int retval; if (!has_hwp) /* set in early_cpuid() */ return; - /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ - get_msr(base_cpu, MSR_PM_ENABLE, &msr); - if ((msr & 1) == 0) { + retval = for_all_cpus_in_set_and(cpu_setsize, cpu_selected_set, is_hwp_enabled_on_cpu); + + if (retval == 0) { fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n"); has_hwp = 0; - return; } } -- cgit v1.2.3 From c97c057d357c4b39b153e9e430bbf8976e05bd4e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 14:07:02 -0400 Subject: tools/power x86_energy_perf_policy: Enhance HWP enable On enabling HWP, preserve the reserved bits in MSR_PM_ENABLE. Also, skip writing the MSR_PM_ENABLE if HWP is already enabled. Signed-off-by: Len Brown --- .../x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 2bf0f878441e..70d7cdf0b07f 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1166,13 +1166,18 @@ int update_hwp_request_pkg(int pkg) int enable_hwp_on_cpu(int cpu) { - unsigned long long msr; + unsigned long long old_msr, new_msr; + + get_msr(cpu, MSR_PM_ENABLE, &old_msr); + + if (old_msr & 1) + return 0; /* already enabled */ - get_msr(cpu, MSR_PM_ENABLE, &msr); - put_msr(cpu, MSR_PM_ENABLE, 1); + new_msr = old_msr | 1; + put_msr(cpu, MSR_PM_ENABLE, new_msr); if (verbose) - printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1); + printf("cpu%d: MSR_PM_ENABLE old: %llX new: %llX\n", cpu, old_msr, new_msr); return 0; } -- cgit v1.2.3 From 8ef8fa829f8ad313f18d694c15874f85d693215d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 15:05:48 -0400 Subject: tools/power x86_energy_perf_policy: Prepare for MSR/sysfs refactoring Rename routines to make "_msr" and "_sysfs" access methods clear No functional change Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 70d7cdf0b07f..ce6fb8080105 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -809,7 +809,7 @@ void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str) h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7); } -void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +void read_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) { unsigned long long msr; @@ -823,7 +823,7 @@ void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1); } -void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +void write_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) { unsigned long long msr = 0; @@ -843,7 +843,7 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms put_msr(cpu, msr_offset, msr); } -static int get_epb(int cpu) +static int get_epb_sysfs(int cpu) { char path[SYSFS_PATH_MAX]; char linebuf[3]; @@ -865,7 +865,7 @@ static int get_epb(int cpu) return (int)val; } -static int set_epb(int cpu, int val) +static int set_epb_sysfs(int cpu, int val) { char path[SYSFS_PATH_MAX]; char linebuf[3]; @@ -895,14 +895,14 @@ int print_cpu_msrs(int cpu) struct msr_hwp_cap cap; int epb; - epb = get_epb(cpu); + epb = get_epb_sysfs(cpu); if (epb >= 0) printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb); if (!has_hwp) return 0; - read_hwp_request(cpu, &req, MSR_HWP_REQUEST); + read_hwp_request_msr(cpu, &req, MSR_HWP_REQUEST); print_hwp_request(cpu, &req, ""); read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); @@ -919,7 +919,7 @@ int print_pkg_msrs(int pkg) if (!has_hwp) return 0; - read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); + read_hwp_request_msr(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); print_hwp_request_pkg(pkg, &req, ""); if (has_hwp_notify) { @@ -1074,14 +1074,14 @@ int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, s return 0; } -int update_hwp_request(int cpu) +int update_hwp_request_msr(int cpu) { struct msr_hwp_request req; struct msr_hwp_cap cap; int msr_offset = MSR_HWP_REQUEST; - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); if (debug) print_hwp_request(cpu, &req, "old: "); @@ -1111,15 +1111,15 @@ int update_hwp_request(int cpu) verify_hwp_req_self_consistency(cpu, &req); - write_hwp_request(cpu, &req, msr_offset); + write_hwp_request_msr(cpu, &req, msr_offset); if (debug) { - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); print_hwp_request(cpu, &req, "new: "); } return 0; } -int update_hwp_request_pkg(int pkg) +int update_hwp_request_pkg_msr(int pkg) { struct msr_hwp_request req; struct msr_hwp_cap cap; @@ -1127,7 +1127,7 @@ int update_hwp_request_pkg(int pkg) int msr_offset = MSR_HWP_REQUEST_PKG; - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); if (debug) print_hwp_request_pkg(pkg, &req, "old: "); @@ -1155,10 +1155,10 @@ int update_hwp_request_pkg(int pkg) verify_hwp_req_self_consistency(cpu, &req); - write_hwp_request(cpu, &req, msr_offset); + write_hwp_request_msr(cpu, &req, msr_offset); if (debug) { - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); print_hwp_request_pkg(pkg, &req, "new: "); } return 0; @@ -1188,8 +1188,8 @@ int update_cpu_msrs(int cpu) int epb; if (update_epb) { - epb = get_epb(cpu); - set_epb(cpu, new_epb); + epb = get_epb_sysfs(cpu); + set_epb_sysfs(cpu, new_epb); if (verbose) printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", @@ -1229,7 +1229,7 @@ int update_cpu_msrs(int cpu) if (!hwp_update_enabled()) return 0; - update_hwp_request(cpu); + update_hwp_request_msr(cpu); return 0; } @@ -1587,7 +1587,7 @@ int main(int argc, char **argv) for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); } else if (pkg_selected_set) - for_packages(pkg_selected_set, update_hwp_request_pkg); + for_packages(pkg_selected_set, update_hwp_request_pkg_msr); return 0; } -- cgit v1.2.3 From f8241f5426eb9feb46a007341edd221ceaf73073 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 15:35:07 -0400 Subject: tools/power x86_energy_perf_policy: EPB access is only via sysfs Comprehend that EPB writes are now only via sysfs by moving it out of the _msr specific path. No functional change. Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 25 ++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index ce6fb8080105..fe10057bf9b7 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1182,19 +1182,23 @@ int enable_hwp_on_cpu(int cpu) return 0; } -int update_cpu_msrs(int cpu) +int update_cpu_epb_sysfs(int cpu) { - unsigned long long msr; int epb; - if (update_epb) { - epb = get_epb_sysfs(cpu); - set_epb_sysfs(cpu, new_epb); + epb = get_epb_sysfs(cpu); + set_epb_sysfs(cpu, new_epb); - if (verbose) - printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", - cpu, epb, (unsigned int) new_epb); - } + if (verbose) + printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", + cpu, epb, (unsigned int) new_epb); + + return 0; +} + +int update_cpu_msrs(int cpu) +{ + unsigned long long msr; if (update_turbo) { int turbo_is_present_and_disabled; @@ -1584,8 +1588,11 @@ int main(int argc, char **argv) /* update CPU set */ if (cpu_selected_set) { + if (update_epb) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_epb_sysfs); for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); + } else if (pkg_selected_set) for_packages(pkg_selected_set, update_hwp_request_pkg_msr); -- cgit v1.2.3 From 2734fdbc9bb8a3aeb309ba0d62212d7f53f30bc7 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 15:56:46 -0400 Subject: tools/power x86_energy_perf_policy: Prefer driver HWP limits When we are successful in using cpufreq min/max limits, skip setting the raw MSR limits entirely. This is necessary to avoid undoing any modification that the cpufreq driver makes to our sysfs request. eg. intel_pstate may take our request for a limit that is valid according to HWP.CAP.MIN/MAX and clip it to be within the range available in PLATFORM_INFO. Signed-off-by: Len Brown --- .../x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index fe10057bf9b7..884a4c746f32 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -62,6 +62,7 @@ unsigned char turbo_update_value; unsigned char update_hwp_epp; unsigned char update_hwp_min; unsigned char update_hwp_max; +unsigned char hwp_limits_done_via_sysfs; unsigned char update_hwp_desired; unsigned char update_hwp_window; unsigned char update_hwp_use_pkg; @@ -951,8 +952,10 @@ int ratio_2_sysfs_khz(int ratio) } /* * If HWP is enabled and cpufreq sysfs attribtes are present, - * then update sysfs, so that it will not become - * stale when we write to MSRs. + * then update via sysfs. The intel_pstate driver may modify (clip) + * this request, say, when HWP_CAP is outside of PLATFORM_INFO limits, + * and the driver-chosen value takes precidence. + * * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq, * so we don't have to touch that.) */ @@ -1007,6 +1010,8 @@ int update_sysfs(int cpu) if (update_hwp_max) update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max); + hwp_limits_done_via_sysfs = 1; + return 0; } @@ -1085,10 +1090,10 @@ int update_hwp_request_msr(int cpu) if (debug) print_hwp_request(cpu, &req, "old: "); - if (update_hwp_min) + if (update_hwp_min && !hwp_limits_done_via_sysfs) req.hwp_min = req_update.hwp_min; - if (update_hwp_max) + if (update_hwp_max && !hwp_limits_done_via_sysfs) req.hwp_max = req_update.hwp_max; if (update_hwp_desired) -- cgit v1.2.3 From a648e0892ccd8c2168c2deae7e12e18a8d596730 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 19 Sep 2025 22:30:18 -0400 Subject: tools/power x86_energy_perf_policy: Add make snapshot target $ make snapshot creates x86_energy_perf_policy-$(DATE).tar.gz Which can be transported to a target machine without needing a kernel tree to build on the target. Useful for creating debug versions. Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/Makefile | 29 ++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 666b325a62a2..d18284667400 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,8 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(CURDIR) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +DAY := $(shell date +%Y.%m.%d) +SNAPSHOT = x86_energy_perf_policy-$(DAY) + + ifeq ("$(origin O)", "command line") BUILD_OUTPUT := $(O) @@ -27,3 +31,26 @@ install : x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/share/man/man8 install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 +snapshot: x86_energy_perf_policy + @rm -rf $(SNAPSHOT) + @mkdir $(SNAPSHOT) + @cp x86_energy_perf_policy Makefile x86_energy_perf_policy.c x86_energy_perf_policy.8 $(SNAPSHOT) + + @sed -e 's/^#include /#include "bits.h"/' -e 's/u64/unsigned long long/' ../../../../arch/x86/include/asm/msr-index.h > $(SNAPSHOT)/msr-index.h + @echo '#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))' >> $(SNAPSHOT)/msr-index.h + @echo "#define BIT(x) (1 << (x))" > $(SNAPSHOT)/bits.h + @echo "#define BIT_ULL(nr) (1ULL << (nr))" >> $(SNAPSHOT)/bits.h + @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h + + @echo PWD=. > $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + + @rm -f $(SNAPSHOT).tar.gz + tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) + -- cgit v1.2.3 From 66f430522452fe1a8a0fd2198cf9f335125acbfc Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Sep 2025 11:50:29 -0400 Subject: tools/power x86_energy_perf_policy.8: Emphasize preference for SW interfaces This tool was originally written when Linux had no standard interface for EPB, or HWP support. Retain the capability to manage a system w/o any kernel PM support, but prefer the standard kernel interfaces, when avaialble. (not doing so led to a confusing conflict between a p-state limit request made via cpufreq and modified by the intel-pstate driver, versus the raw MSR write made by this utility) Signed-off-by: Len Brown --- .../x86/x86_energy_perf_policy/x86_energy_perf_policy.8 | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 index 78c6361898b1..0aa981c18e56 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -2,7 +2,7 @@ .\" Distributed under the GPL, Copyleft 1994. .TH X86_ENERGY_PERF_POLICY 8 .SH NAME -x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers +x86_energy_perf_policy \- Manage Energy vs. Performance Policy .SH SYNOPSIS .B x86_energy_perf_policy .RB "[ options ] [ scope ] [field \ value]" @@ -19,9 +19,14 @@ x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Spe .SH DESCRIPTION \fBx86_energy_perf_policy\fP displays and updates energy-performance policy settings specific to -Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR) -updates, no matter if the Linux cpufreq sub-system is enabled or not. +Intel Architecture Processors. It summarizes settings available +in standard Linux interfaces (eg. cpufreq), +and also decodes underlying Model Specific Register (MSRs). +While \fBx86_energy_perf_policy\fP can manage energy-performance policy +using only MSR access, it prefers standard +Linux kernel interfaces, when they are available. +.SH BACKGROUND Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB) may affect a wide range of hardware decisions, such as how aggressively the hardware enters and exits CPU idle states (C-states) @@ -200,7 +205,9 @@ runs only as root. .SH FILES .ta .nf -/dev/cpu/*/msr +EPB: /sys/devices/system/cpu/cpu*/power/energy_perf_bias +EPP: /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference +MSR: /dev/cpu/*/msr .fi .SH "SEE ALSO" .nf -- cgit v1.2.3 From 927f3e85015285ff68789bd00aa66b071016b27f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 23 Sep 2025 15:06:26 -0700 Subject: PM: runtime: Documentation: ABI: Document time units for *_time Many .../power/... time-related attributes have an "_ms" suffix and also include language in their ABI description to make clear that their time is measured in milliseconds. However, 'runtime_suspended_time' and 'runtime_active_time' have neither, and it takes me a nontrivial amount of time to poke through the source to confirm that they are also measured in milliseconds. Update the doc with "millisecond" units. Signed-off-by: Brian Norris Reviewed-by: Dhruva Gole Link: https://patch.msgid.link/20250923150625.1.If11a14e33d578369db48d678395d0323bdb01915@changeid [ rjw: Subject edits, changelog tweak ] Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-power | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index e4ec5de9a5dd..9bf7c8a267c5 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -274,15 +274,15 @@ What: /sys/devices/.../power/runtime_active_time Date: Jul 2010 Contact: Arjan van de Ven Description: - Reports the total time that the device has been active. - Used for runtime PM statistics. + Reports the total time that the device has been active, in + milliseconds. Used for runtime PM statistics. What: /sys/devices/.../power/runtime_suspended_time Date: Jul 2010 Contact: Arjan van de Ven Description: - Reports total time that the device has been suspended. - Used for runtime PM statistics. + Reports total time that the device has been suspended, in + milliseconds. Used for runtime PM statistics. What: /sys/devices/.../power/runtime_usage Date: Apr 2010 -- cgit v1.2.3 From bbfe987c5a2854705393ad79813074e5eadcbde6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 13:10:22 +0200 Subject: PM: hibernate: Fix pm_hibernation_mode_is_suspend() build breakage Commit 495c8d35035e ("PM: hibernate: Add pm_hibernation_mode_is_suspend()") that introduced pm_hibernation_mode_is_suspend() did not define it in the case when CONFIG_HIBERNATION is unset, but CONFIG_SUSPEND is set. Subsequent commit 0a6e9e098fcc ("drm/amd: Fix hybrid sleep") made the amdgpu driver use that function which led to kernel build breakage in the case mentioned above [1]. Address this by using appropriate #ifdeffery around the definition of pm_hibernation_mode_is_suspend(). Fixes: 0a6e9e098fcc ("drm/amd: Fix hybrid sleep") Reported-by: KernelCI bot Closes: https://groups.io/g/kernelci-results/topic/regression_pm_testing/115439919 [1] Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- include/linux/suspend.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0664c685f0b2..b02876f1ae38 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -276,7 +276,6 @@ extern void arch_suspend_enable_irqs(void); extern int pm_suspend(suspend_state_t state); extern bool sync_on_suspend_enabled; -bool pm_hibernation_mode_is_suspend(void); #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL @@ -289,7 +288,6 @@ static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } -static inline bool pm_hibernation_mode_is_suspend(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } @@ -420,6 +418,12 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { } #endif /* CONFIG_HIBERNATION */ +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_SUSPEND) +bool pm_hibernation_mode_is_suspend(void); +#else +static inline bool pm_hibernation_mode_is_suspend(void) { return false; } +#endif + int arch_resume_nosmt(void); #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV -- cgit v1.2.3 From 6f4c6f9ed4ce65303f6bb153e2afc71bc33c8ded Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 18:40:25 +0200 Subject: PM: hibernate: Restrict GFP mask in power_down() Commit 12ffc3b1513e ("PM: Restrict swap use to later in the suspend sequence") caused hibernation_platform_enter() to call pm_restore_gfp_mask() via dpm_resume_end(), so when power_down() returns after aborting hibernation_platform_enter(), it needs to match the pm_restore_gfp_mask() call in hibernate() that will occur subsequently. Address this by adding a pm_restrict_gfp_mask() call to the relevant error path in power_down(). Fixes: 12ffc3b1513e ("PM: Restrict swap use to later in the suspend sequence") Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- kernel/power/hibernate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 5d146218cae8..728328c51b64 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -733,6 +733,8 @@ static void power_down(void) case HIBERNATION_PLATFORM: error = hibernation_platform_enter(); if (error == -EAGAIN || error == -EBUSY) { + /* Match pm_restore_gfp_mask() in hibernate(). */ + pm_restrict_gfp_mask(); swsusp_unmark(); events_check_enabled = false; pr_info("Wakeup event detected during hibernation, rolling back.\n"); -- cgit v1.2.3 From 1f5bcfe91ffce71bdd1022648b9d501d46d20c09 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 18:41:21 +0200 Subject: PM: hibernate: Combine return paths in power_down() To avoid code duplication and improve clarity, combine the code paths in power_down() leading to a return from that function. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/3571055.QJadu78ljV@rafael.j.wysocki [ rjw: Changed the new label name to "exit" ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 728328c51b64..14e85ff23551 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -708,21 +708,11 @@ static void power_down(void) if (hibernation_mode == HIBERNATION_SUSPEND) { pm_restore_gfp_mask(); error = suspend_devices_and_enter(mem_sleep_current); - if (error) { - hibernation_mode = hibernation_ops ? - HIBERNATION_PLATFORM : - HIBERNATION_SHUTDOWN; - } else { - /* Match pm_restore_gfp_mask() call in hibernate() */ - pm_restrict_gfp_mask(); - - /* Restore swap signature. */ - error = swsusp_unmark(); - if (error) - pr_err("Swap will be unusable! Try swapon -a.\n"); + if (!error) + goto exit; - return; - } + hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : + HIBERNATION_SHUTDOWN; } #endif @@ -733,12 +723,9 @@ static void power_down(void) case HIBERNATION_PLATFORM: error = hibernation_platform_enter(); if (error == -EAGAIN || error == -EBUSY) { - /* Match pm_restore_gfp_mask() in hibernate(). */ - pm_restrict_gfp_mask(); - swsusp_unmark(); events_check_enabled = false; pr_info("Wakeup event detected during hibernation, rolling back.\n"); - return; + goto exit; } fallthrough; case HIBERNATION_SHUTDOWN: @@ -757,6 +744,15 @@ static void power_down(void) pr_crit("Power down manually\n"); while (1) cpu_relax(); + +exit: + /* Match the pm_restore_gfp_mask() call in hibernate(). */ + pm_restrict_gfp_mask(); + + /* Restore swap signature. */ + error = swsusp_unmark(); + if (error) + pr_err("Swap will be unusable! Try swapon -a.\n"); } static int load_image_and_restore(void) -- cgit v1.2.3