diff options
-rw-r--r-- | Documentation/devicetree/bindings/power/power_domain.txt | 19 | ||||
-rw-r--r-- | drivers/base/dd.c | 3 | ||||
-rw-r--r-- | drivers/base/power/common.c | 43 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 134 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 27 | ||||
-rw-r--r-- | drivers/base/power/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 2 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 12 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 29 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 179 | ||||
-rw-r--r-- | drivers/cpufreq/ti-cpufreq.c | 7 | ||||
-rw-r--r-- | include/linux/pm_domain.h | 15 | ||||
-rw-r--r-- | include/linux/pm_runtime.h | 6 | ||||
-rw-r--r-- | tools/power/cpupower/bench/parse.c | 2 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 15 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 35 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 9 |
18 files changed, 468 insertions, 75 deletions
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 4733f76cbe48..9b387f861aed 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -111,8 +111,8 @@ Example 3: ==PM domain consumers== Required properties: - - power-domains : A phandle and PM domain specifier as defined by bindings of - the power controller specified by phandle. + - power-domains : A list of PM domain specifiers, as defined by bindings of + the power controller that is the PM domain provider. Example: @@ -122,9 +122,18 @@ Example: power-domains = <&power 0>; }; -The node above defines a typical PM domain consumer device, which is located -inside a PM domain with index 0 of a power controller represented by a node -with the label "power". + leaky-device@12351000 { + compatible = "foo,i-leak-current"; + reg = <0x12351000 0x1000>; + power-domains = <&power 0>, <&power 1> ; + }; + +The first example above defines a typical PM domain consumer device, which is +located inside a PM domain with index 0 of a power controller represented by a +node with the label "power". +In the second example the consumer device are partitioned across two PM domains, +the first with index 0 and the second with index 1, of a power controller that +is represented by a node with the label "power. Optional properties: - required-opps: This contains phandle to an OPP node in another device's OPP diff --git a/drivers/base/dd.c b/drivers/base/dd.c index fb4e2df68d95..1435d7281c66 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -580,7 +580,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); - pm_runtime_resume_suppliers(dev); + pm_runtime_get_suppliers(dev); if (dev->parent) pm_runtime_get_sync(dev->parent); @@ -591,6 +591,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) if (dev->parent) pm_runtime_put(dev->parent); + pm_runtime_put_suppliers(dev); return ret; } diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 7ae62b6355b8..df41b4780b3b 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -117,13 +117,50 @@ int dev_pm_domain_attach(struct device *dev, bool power_on) EXPORT_SYMBOL_GPL(dev_pm_domain_attach); /** + * dev_pm_domain_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * As @dev may only be attached to a single PM domain, the backend PM domain + * provider creates a virtual device to attach instead. If attachment succeeds, + * the ->detach() callback in the struct dev_pm_domain are assigned by the + * corresponding backend attach function, as to deal with detaching of the + * created virtual device. + * + * This function should typically be invoked by a driver during the probe phase, + * in case its device requires power management through multiple PM domains. The + * driver may benefit from using the received device, to configure device-links + * towards its original device. Depending on the use-case and if needed, the + * links may be dynamically changed by the driver, which allows it to control + * the power to the PM domains independently from each other. + * + * Callers must ensure proper synchronization of this function with power + * management callbacks. + * + * Returns the virtual created device when successfully attached to its PM + * domain, NULL in case @dev don't need a PM domain, else an ERR_PTR(). + * Note that, to detach the returned virtual device, the driver shall call + * dev_pm_domain_detach() on it, typically during the remove phase. + */ +struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index) +{ + if (dev->pm_domain) + return ERR_PTR(-EEXIST); + + return genpd_dev_pm_attach_by_id(dev, index); +} +EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id); + +/** * dev_pm_domain_detach - Detach a device from its PM domain. * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * - * This functions will reverse the actions from dev_pm_domain_attach() and thus - * try to detach the @dev from its PM domain. Typically it should be invoked - * from subsystem level code during the remove phase. + * This functions will reverse the actions from dev_pm_domain_attach() and + * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain. + * Typically it should be invoked during the remove phase, either from + * subsystem level code or from drivers. * * Callers must ensure proper synchronization of this function with power * management callbacks. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6f403d6fccb2..4925af5c4cf0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2171,6 +2171,15 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) } EXPORT_SYMBOL_GPL(of_genpd_remove_last); +static void genpd_release_dev(struct device *dev) +{ + kfree(dev); +} + +static struct bus_type genpd_bus_type = { + .name = "genpd", +}; + /** * genpd_dev_pm_detach - Detach a device from its PM domain. * @dev: Device to detach. @@ -2208,6 +2217,10 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) /* Check if PM domain can be powered off after removing this device. */ genpd_queue_power_off_work(pd); + + /* Unregister the device if it was created by genpd. */ + if (dev->bus == &genpd_bus_type) + device_unregister(dev); } static void genpd_dev_pm_sync(struct device *dev) @@ -2221,32 +2234,17 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -/** - * genpd_dev_pm_attach - Attach a device to its PM domain using DT. - * @dev: Device to attach. - * - * Parse device's OF node to find a PM domain specifier. If such is found, - * attaches the device to retrieved pm_domain ops. - * - * Returns 1 on successfully attached PM domain, 0 when the device don't need a - * PM domain or a negative error code in case of failures. Note that if a - * power-domain exists for the device, but it cannot be found or turned on, - * then return -EPROBE_DEFER to ensure that the device is not probed and to - * re-try again later. - */ -int genpd_dev_pm_attach(struct device *dev) +static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, + unsigned int index) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; int ret; - if (!dev->of_node) - return 0; - - ret = of_parse_phandle_with_args(dev->of_node, "power-domains", - "#power-domain-cells", 0, &pd_args); + ret = of_parse_phandle_with_args(np, "power-domains", + "#power-domain-cells", index, &pd_args); if (ret < 0) - return 0; + return ret; mutex_lock(&gpd_list_lock); pd = genpd_get_from_provider(&pd_args); @@ -2282,8 +2280,98 @@ int genpd_dev_pm_attach(struct device *dev) return ret ? -EPROBE_DEFER : 1; } + +/** + * genpd_dev_pm_attach - Attach a device to its PM domain using DT. + * @dev: Device to attach. + * + * Parse device's OF node to find a PM domain specifier. If such is found, + * attaches the device to retrieved pm_domain ops. + * + * Returns 1 on successfully attached PM domain, 0 when the device don't need a + * PM domain or when multiple power-domains exists for it, else a negative error + * code. Note that if a power-domain exists for the device, but it cannot be + * found or turned on, then return -EPROBE_DEFER to ensure that the device is + * not probed and to re-try again later. + */ +int genpd_dev_pm_attach(struct device *dev) +{ + if (!dev->of_node) + return 0; + + /* + * Devices with multiple PM domains must be attached separately, as we + * can only attach one PM domain per device. + */ + if (of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells") != 1) + return 0; + + return __genpd_dev_pm_attach(dev, dev->of_node, 0); +} EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); +/** + * genpd_dev_pm_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * Parse device's OF node to find a PM domain specifier at the provided @index. + * If such is found, creates a virtual device and attaches it to the retrieved + * pm_domain ops. To deal with detaching of the virtual device, the ->detach() + * callback in the struct dev_pm_domain are assigned to genpd_dev_pm_detach(). + * + * Returns the created virtual device if successfully attached PM domain, NULL + * when the device don't need a PM domain, else an ERR_PTR() in case of + * failures. If a power-domain exists for the device, but cannot be found or + * turned on, then ERR_PTR(-EPROBE_DEFER) is returned to ensure that the device + * is not probed and to re-try again later. + */ +struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index) +{ + struct device *genpd_dev; + int num_domains; + int ret; + + if (!dev->of_node) + return NULL; + + /* Deal only with devices using multiple PM domains. */ + num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (num_domains < 2 || index >= num_domains) + return NULL; + + /* Allocate and register device on the genpd bus. */ + genpd_dev = kzalloc(sizeof(*genpd_dev), GFP_KERNEL); + if (!genpd_dev) + return ERR_PTR(-ENOMEM); + + dev_set_name(genpd_dev, "genpd:%u:%s", index, dev_name(dev)); + genpd_dev->bus = &genpd_bus_type; + genpd_dev->release = genpd_release_dev; + + ret = device_register(genpd_dev); + if (ret) { + kfree(genpd_dev); + return ERR_PTR(ret); + } + + /* Try to attach the device to the PM domain at the specified index. */ + ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index); + if (ret < 1) { + device_unregister(genpd_dev); + return ret ? ERR_PTR(ret) : NULL; + } + + pm_runtime_set_active(genpd_dev); + pm_runtime_enable(genpd_dev); + + return genpd_dev; +} +EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id); + static const struct of_device_id idle_state_match[] = { { .compatible = "domain-idle-state", }, { } @@ -2443,6 +2531,12 @@ unlock: } EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state); +static int __init genpd_bus_init(void) +{ + return bus_register(&genpd_bus_type); +} +core_initcall(genpd_bus_init); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index c6030f100c08..beb85c31f3fa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1563,16 +1563,37 @@ void pm_runtime_clean_up_links(struct device *dev) } /** - * pm_runtime_resume_suppliers - Resume supplier devices. + * pm_runtime_get_suppliers - Resume and reference-count supplier devices. * @dev: Consumer device. */ -void pm_runtime_resume_suppliers(struct device *dev) +void pm_runtime_get_suppliers(struct device *dev) { + struct device_link *link; int idx; idx = device_links_read_lock(); - rpm_get_suppliers(dev); + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_get_sync(link->supplier); + + device_links_read_unlock(idx); +} + +/** + * pm_runtime_put_suppliers - Drop references to supplier devices. + * @dev: Consumer device. + */ +void pm_runtime_put_suppliers(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_put(link->supplier); device_links_read_unlock(idx); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 0f651efc58a1..d713738ce796 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -353,7 +353,7 @@ static ssize_t wakeup_count_show(struct device *dev, spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->event_count; + count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index c7ce928fbf1f..52f5f1a2040c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ default ARCH_OMAP2PLUS config ARM_QCOM_CPUFREQ_KRYO - bool "Qualcomm Kryo based CPUFreq" + tristate "Qualcomm Kryo based CPUFreq" depends on ARM64 depends on QCOM_QFPROM depends on QCOM_SMEM diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 8ff1c9123834..b61f4ec43e06 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -465,8 +465,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } -unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, - unsigned int target_freq) +static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 871bf9cf55cf..1d50e97d49f1 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * calls, so the previous load value can be used then. */ load = j_cdbs->prev_load; - } else if (unlikely(time_elapsed > 2 * sampling_rate && + } else if (unlikely((int)idle_time > 2 * sampling_rate && j_cdbs->prev_load)) { /* * If the CPU had gone completely idle and a task has @@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * clear prev_load to guarantee that the load will be * computed again next time. * - * Detecting this situation is easy: the governor's - * utilization update handler would not have run during - * CPU-idle periods. Hence, an unusually large - * 'time_elapsed' (as compared to the sampling rate) + * Detecting this situation is easy: an unusually large + * 'idle_time' (as compared to the sampling rate) * indicates this scenario. */ load = j_cdbs->prev_load; @@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) j_cdbs->prev_load = load; } - if (time_elapsed > 2 * sampling_rate) { - unsigned int periods = time_elapsed / sampling_rate; + if (unlikely((int)idle_time > 2 * sampling_rate)) { + unsigned int periods = idle_time / sampling_rate; if (periods < idle_periods) idle_periods = periods; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 70912104a199..8b3c2a79ad6c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -266,6 +266,8 @@ put_node: } #define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3 static void imx6ul_opp_check_speed_grading(struct device *dev) { @@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev) * Speed GRADING[1:0] defines the max speed of ARM: * 2b'00: Reserved; * 2b'01: 528000000Hz; - * 2b'10: 696000000Hz; - * 2b'11: Reserved; + * 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL; + * 2b'11: 900000000Hz on i.MX6ULL only; * We need to set the max speed of ARM according to fuse map. */ val = readl_relaxed(base + OCOTP_CFG3); val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) - if (dev_pm_opp_disable(dev, 696000000)) - dev_warn(dev, "failed to disable 696MHz OPP\n"); + + if (of_machine_is_compatible("fsl,imx6ul")) { + if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) + if (dev_pm_opp_disable(dev, 696000000)) + dev_warn(dev, "failed to disable 696MHz OPP\n"); + } + + if (of_machine_is_compatible("fsl,imx6ull")) { + if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ) + if (dev_pm_opp_disable(dev, 792000000)) + dev_warn(dev, "failed to disable 792MHz OPP\n"); + + if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ) + if (dev_pm_opp_disable(dev, 900000000)) + dev_warn(dev, "failed to disable 900MHz OPP\n"); + } + iounmap(base); put_node: of_node_put(np); @@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } - if (of_machine_is_compatible("fsl,imx6ul")) + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) imx6ul_opp_check_speed_grading(cpu_dev); else imx6q_opp_check_speed_grading(cpu_dev); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6575408f279..1de5ec8d5ea3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,11 @@ struct global_params { * preference/bias * @epp_saved: Saved EPP/EPB during system suspend or CPU offline * operation + * @hwp_req_cached: Cached value of the last HWP Request MSR + * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR + * @last_io_update: Last time when IO wake flag was set + * @sched_flags: Store scheduler flags for possible cross CPU update + * @hwp_boost_min: Last HWP boosted min performance * * This structure stores per CPU instance data for all CPUs. */ @@ -253,6 +258,11 @@ struct cpudata { s16 epp_policy; s16 epp_default; s16 epp_saved; + u64 hwp_req_cached; + u64 hwp_cap_cached; + u64 last_io_update; + unsigned int sched_flags; + u32 hwp_boost_min; }; static struct cpudata **all_cpu_data; @@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; +static bool hwp_boost __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, u64 cap; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); if (global.no_turbo) *current_max = HWP_GUARANTEED_PERF(cap); else @@ -763,6 +775,7 @@ update_epp: intel_pstate_set_epb(cpu, epp); } skip_epp: + WRITE_ONCE(cpu_data->hwp_req_cached, value); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } @@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, return count; } +static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", hwp_boost); +} + +static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = kstrtouint(buf, 10, &input); + if (ret) + return ret; + + mutex_lock(&intel_pstate_driver_lock); + hwp_boost = !!input; + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct); define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); +define_one_global_rw(hwp_dynamic_boost); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void) rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); WARN_ON(rc); + if (hwp_active) { + rc = sysfs_create_file(intel_pstate_kobject, + &hwp_dynamic_boost.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) intel_pstate_set_min_pstate(cpu); } +/* + * Long hold time will keep high perf limits for long time, + * which negatively impacts perf/watt for some workloads, + * like specpower. 3ms is based on experiements on some + * workoads. + */ +static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; + +static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) +{ + u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u32 max_limit = (hwp_req & 0xff00) >> 8; + u32 min_limit = (hwp_req & 0xff); + u32 boost_level1; + + /* + * Cases to consider (User changes via sysfs or boot time): + * If, P0 (Turbo max) = P1 (Guaranteed max) = min: + * No boost, return. + * If, P0 (Turbo max) > P1 (Guaranteed max) = min: + * Should result in one level boost only for P0. + * If, P0 (Turbo max) = P1 (Guaranteed max) > min: + * Should result in two level boost: + * (min + p1)/2 and P1. + * If, P0 (Turbo max) > P1 (Guaranteed max) > min: + * Should result in three level boost: + * (min + p1)/2, P1 and P0. + */ + + /* If max and min are equal or already at max, nothing to boost */ + if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) + return; + + if (!cpu->hwp_boost_min) + cpu->hwp_boost_min = min_limit; + + /* level at half way mark between min and guranteed */ + boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + + if (cpu->hwp_boost_min < boost_level1) + cpu->hwp_boost_min = boost_level1; + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && + max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = max_limit; + else + return; + + hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; + wrmsrl(MSR_HWP_REQUEST, hwp_req); + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) +{ + if (cpu->hwp_boost_min) { + bool expired; + + /* Check if we are idle for hold time to boost down */ + expired = time_after64(cpu->sample.time, cpu->last_update + + hwp_boost_hold_time_ns); + if (expired) { + wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); + cpu->hwp_boost_min = 0; + } + } + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu, + u64 time) +{ + cpu->sample.time = time; + + if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { + bool do_io = false; + + cpu->sched_flags = 0; + /* + * Set iowait_boost flag and update time. Since IO WAIT flag + * is set all the time, we can't just conclude that there is + * some IO bound activity is scheduled on this CPU with just + * one occurrence. If we receive at least two in two + * consecutive ticks, then we treat as boost candidate. + */ + if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC)) + do_io = true; + + cpu->last_io_update = time; + + if (do_io) + intel_pstate_hwp_boost_up(cpu); + + } else { + intel_pstate_hwp_boost_down(cpu); + } +} + +static inline void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + + cpu->sched_flags |= flags; + + if (smp_processor_id() == cpu->cpu) + intel_pstate_update_util_hwp_local(cpu, time); +} + static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { {} }; +static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + {} +}; + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); + + id = x86_match_cpu(intel_pstate_hwp_boost_ids); + if (id) + hwp_boost = true; } intel_pstate_get_cpu_pstates(cpu); @@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; - if (hwp_active) + if (hwp_active && !hwp_boost) return; if (cpu->update_util_set) @@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + (hwp_active ? + intel_pstate_update_util_hwp : + intel_pstate_update_util)); cpu->update_util_set = true; } @@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_set_update_util_hook(policy->cpu); } - if (hwp_active) + if (hwp_active) { + /* + * When hwp_boost was active before and dynamically it + * was turned off, in that case we need to clear the + * update util hook. + */ + if (!hwp_boost) + intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); + } mutex_unlock(&intel_pstate_limits_lock); diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 6ba709b6f095..3f0e2a14895a 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -217,7 +217,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) if (!match) return -ENODEV; - opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL); + opp_data = devm_kzalloc(&pdev->dev, sizeof(*opp_data), GFP_KERNEL); if (!opp_data) return -ENOMEM; @@ -226,8 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) opp_data->cpu_dev = get_cpu_device(0); if (!opp_data->cpu_dev) { pr_err("%s: Failed to get device for CPU0\n", __func__); - ret = ENODEV; - goto free_opp_data; + return -ENODEV; } opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev); @@ -285,8 +284,6 @@ register_cpufreq_dt: fail_put_node: of_node_put(opp_data->opp_node); -free_opp_data: - kfree(opp_data); return ret; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 42e0d649e653..9206a4fef9ac 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -237,6 +237,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *opp_node); int genpd_dev_pm_attach(struct device *dev); +struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) @@ -282,6 +284,12 @@ static inline int genpd_dev_pm_attach(struct device *dev) return 0; } +static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index) +{ + return NULL; +} + static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { @@ -291,6 +299,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) #ifdef CONFIG_PM int dev_pm_domain_attach(struct device *dev, bool power_on); +struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index); void dev_pm_domain_detach(struct device *dev, bool power_off); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); #else @@ -298,6 +308,11 @@ static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { return 0; } +static inline struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index) +{ + return NULL; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index db5dbbf7a48d..f0fc4700b6ff 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -56,7 +56,8 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_clean_up_links(struct device *dev); -extern void pm_runtime_resume_suppliers(struct device *dev); +extern void pm_runtime_get_suppliers(struct device *dev); +extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device *dev); @@ -172,7 +173,8 @@ static inline unsigned long pm_runtime_autosuspend_expiration( static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_clean_up_links(struct device *dev) {} -static inline void pm_runtime_resume_suppliers(struct device *dev) {} +static inline void pm_runtime_get_suppliers(struct device *dev) {} +static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device *dev) {} diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index 9b65f052081f..9ba8a44ad2a7 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname) dirname, time(NULL)); } - dprintf("logilename: %s\n", filename); + dprintf("logfilename: %s\n", filename); output = fopen(filename, "w+"); if (output == NULL) { diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 5b3205f16217..5b8c4956ff9a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num) } } +#ifdef __powerpc__ +void map_power_idle_state_name(char *tmp) +{ + if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop0L"); + else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop1L"); + else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop2L"); +} +#else +void map_power_idle_state_name(char *tmp) { } +#endif + static struct cpuidle_monitor *cpuidle_register(void) { int num; @@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void) if (tmp == NULL) continue; + map_power_idle_state_name(tmp); fix_up_intel_idle_driver_name(tmp, num); strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 05f953f0f0a0..051da0a7c454 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -70,36 +70,43 @@ void print_n_spaces(int n) printf(" "); } -/* size of s must be at least n + 1 */ +/*s is filled with left and right spaces + *to make its length atleast n+1 + */ int fill_string_with_spaces(char *s, int n) { + char *temp; int len = strlen(s); - if (len > n) + + if (len >= n) return -1; + + temp = malloc(sizeof(char) * (n+1)); for (; len < n; len++) s[len] = ' '; s[len] = '\0'; + snprintf(temp, n+1, " %s", s); + strcpy(s, temp); + free(temp); return 0; } +#define MAX_COL_WIDTH 6 void print_header(int topology_depth) { int unsigned mon; int state, need_len; cstate_t s; char buf[128] = ""; - int percent_width = 4; fill_string_with_spaces(buf, topology_depth * 5 - 1); printf("%s|", buf); for (mon = 0; mon < avail_monitors; mon++) { - need_len = monitors[mon]->hw_states_num * (percent_width + 3) + need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1) - 1; - if (mon != 0) { - printf("|| "); - need_len--; - } + if (mon != 0) + printf("||"); sprintf(buf, "%s", monitors[mon]->name); fill_string_with_spaces(buf, need_len); printf("%s", buf); @@ -107,23 +114,21 @@ void print_header(int topology_depth) printf("\n"); if (topology_depth > 2) - printf("PKG |"); + printf(" PKG|"); if (topology_depth > 1) printf("CORE|"); if (topology_depth > 0) - printf("CPU |"); + printf(" CPU|"); for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) - printf("|| "); - else - printf(" "); + printf("||"); for (state = 0; state < monitors[mon]->hw_states_num; state++) { if (state != 0) - printf(" | "); + printf("|"); s = monitors[mon]->hw_states[state]; sprintf(buf, "%s", s.name); - fill_string_with_spaces(buf, percent_width); + fill_string_with_spaces(buf, MAX_COL_WIDTH); printf("%s", buf); } printf(" "); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9e43f3371fbc..2ae50b499e0a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -15,7 +15,16 @@ #define MONITORS_MAX 20 #define MONITOR_NAME_LEN 20 + +/* CSTATE_NAME_LEN is limited by header field width defined + * in cpupower-monitor.c. Header field width is defined to be + * sum of percent width and two spaces for padding. + */ +#ifdef __powerpc__ +#define CSTATE_NAME_LEN 7 +#else #define CSTATE_NAME_LEN 5 +#endif #define CSTATE_DESC_LEN 60 int cpu_count; |