From 0da11bf0cab9029db8b85e48d962ff05c00a4faa Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Fri, 27 May 2022 00:53:45 +0000 Subject: cpuidle: haltpoll: Add trace points for guest_halt_poll_ns grow/shrink Add trace points as are implemented in KVM host halt polling. This helps tune guest halt polling params. Signed-off-by: Eiichi Tsukata Acked-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- include/trace/events/power.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index af5018aa9517..c708521e4ed5 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -500,6 +500,35 @@ DEFINE_EVENT(dev_pm_qos_request, dev_pm_qos_remove_request, TP_ARGS(name, type, new_value) ); + +TRACE_EVENT(guest_halt_poll_ns, + + TP_PROTO(bool grow, unsigned int new, unsigned int old), + + TP_ARGS(grow, new, old), + + TP_STRUCT__entry( + __field(bool, grow) + __field(unsigned int, new) + __field(unsigned int, old) + ), + + TP_fast_assign( + __entry->grow = grow; + __entry->new = new; + __entry->old = old; + ), + + TP_printk("halt_poll_ns %u (%s %u)", + __entry->new, + __entry->grow ? "grow" : "shrink", + __entry->old) +); + +#define trace_guest_halt_poll_ns_grow(new, old) \ + trace_guest_halt_poll_ns(true, new, old) +#define trace_guest_halt_poll_ns_shrink(new, old) \ + trace_guest_halt_poll_ns(false, new, old) #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 09d3154a6f0f0bb5b604832095804780f3684b96 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 6 Jun 2022 22:51:58 -0500 Subject: PM: wakeup: Unify device_init_wakeup() for PM_SLEEP and !PM_SLEEP Previously the CONFIG_PM_SLEEP and !CONFIG_PM_SLEEP device_init_wakeup() implementations differed in confusing ways: - The PM_SLEEP version checked for a NULL device pointer and returned -EINVAL, while the !PM_SLEEP version did not and would simply dereference a NULL pointer. - When called with "false", the !PM_SLEEP version cleared "capable" and "enable" in the opposite order of the PM_SLEEP version. That was harmless because for !PM_SLEEP they're simple assignments, but it's unnecessary confusion. Use a simplified version of the PM_SLEEP implementation for both cases. Signed-off-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 196a157456aa..77f4849e3418 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -109,7 +109,6 @@ extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); -extern int device_init_wakeup(struct device *dev, bool val); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); @@ -167,13 +166,6 @@ static inline int device_set_wakeup_enable(struct device *dev, bool enable) return 0; } -static inline int device_init_wakeup(struct device *dev, bool val) -{ - device_set_wakeup_capable(dev, val); - device_set_wakeup_enable(dev, val); - return 0; -} - static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && dev->power.should_wakeup; @@ -217,4 +209,27 @@ static inline void pm_wakeup_hard_event(struct device *dev) return pm_wakeup_dev_event(dev, 0, true); } +/** + * device_init_wakeup - Device wakeup initialization. + * @dev: Device to handle. + * @enable: Whether or not to enable @dev as a wakeup device. + * + * By default, most devices should leave wakeup disabled. The exceptions are + * devices that everyone expects to be wakeup sources: keyboards, power buttons, + * possibly network interfaces, etc. Also, devices that don't generate their + * own wakeup requests but merely forward requests from one bus to another + * (like PCI bridges) should have wakeup enabled by default. + */ +static inline int device_init_wakeup(struct device *dev, bool enable) +{ + if (enable) { + device_set_wakeup_capable(dev, true); + return device_wakeup_enable(dev); + } else { + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + return 0; + } +} + #endif /* _LINUX_PM_WAKEUP_H */ -- cgit v1.2.3 From ae6ccaa650380d243cf43d31c864c5ced2fd4612 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 7 Jul 2022 08:15:52 +0100 Subject: PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba Acked-by: Daniel Lezcano Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 54 +++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 8419bffb4398..b9caa01dfac4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -62,7 +62,7 @@ struct em_perf_domain { /* * em_perf_domain flags: * - * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some + * EM_PERF_DOMAIN_MICROWATTS: The power values are in micro-Watts or some * other scale. * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating @@ -71,7 +71,7 @@ struct em_perf_domain { * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be * created by platform missing real power information */ -#define EM_PERF_DOMAIN_MILLIWATTS BIT(0) +#define EM_PERF_DOMAIN_MICROWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) #define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) @@ -79,22 +79,44 @@ struct em_perf_domain { #define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL -#define EM_MAX_POWER 0xFFFF +/* + * The max power value in micro-Watts. The limit of 64 Watts is set as + * a safety net to not overflow multiplications on 32bit platforms. The + * 32bit value limit for total Perf Domain power implies a limit of + * maximum CPUs in such domain to 64. + */ +#define EM_MAX_POWER (64000000) /* 64 Watts */ + +/* + * To avoid possible energy estimation overflow on 32bit machines add + * limits to number of CPUs in the Perf. Domain. + * We are safe on 64bit machine, thus some big number. + */ +#ifdef CONFIG_64BIT +#define EM_MAX_NUM_CPUS 4096 +#else +#define EM_MAX_NUM_CPUS 16 +#endif /* - * Increase resolution of energy estimation calculations for 64-bit - * architectures. The extra resolution improves decision made by EAS for the - * task placement when two Performance Domains might provide similar energy - * estimation values (w/o better resolution the values could be equal). + * To avoid an overflow on 32bit machines while calculating the energy + * use a different order in the operation. First divide by the 'cpu_scale' + * which would reduce big value stored in the 'cost' field, then multiply by + * the 'sum_util'. This would allow to handle existing platforms, which have + * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. + * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' + * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. + * This reordering of operations has some limitations, we lose small + * precision in the estimation (comparing to 64bit platform w/o reordering). * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit - * are pretty high and the returns do not justify the increased costs. + * We are safe on 64bit machine. */ #ifdef CONFIG_64BIT -#define em_scale_power(p) ((p) * 1000) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) * (sum_util)) / (scale_cpu)) #else -#define em_scale_power(p) (p) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) / (scale_cpu)) * (sum_util)) #endif struct em_data_callback { @@ -112,7 +134,7 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-Watts or an abstract scale. It is expected to + * expressed in micro-Watts or an abstract scale. It is expected to * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. @@ -148,7 +170,7 @@ struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts); + bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -273,7 +295,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * pd_nrg = ------------------------ (4) * scale_cpu */ - return ps->cost * sum_util / scale_cpu; + return em_estimate_energy(ps->cost, sum_util, scale_cpu); } /** @@ -297,7 +319,7 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts) + bool microwatts) { return -EINVAL; } -- cgit v1.2.3 From 5e0fd2026cdd474a85b3135c312912321e60f47a Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 7 Jul 2022 08:15:54 +0100 Subject: firmware: arm_scmi: Get detailed power scale from perf In SCMI v3.1 the power scale can be in micro-Watts. The upper layers, e.g. cpufreq and EM should handle received power values properly (upscale when needed). Thus, provide an interface which allows to check what is the scale for power values. The old interface allowed to distinguish between bogo-Watts and milli-Watts only (which was good for older SCMI spec). Acked-by: Sudeep Holla Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/scmi_protocol.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 704111f63993..a0a246310ba1 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -60,6 +60,12 @@ struct scmi_clock_info { }; }; +enum scmi_power_scale { + SCMI_POWER_BOGOWATTS, + SCMI_POWER_MILLIWATTS, + SCMI_POWER_MICROWATTS +}; + struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; @@ -135,7 +141,7 @@ struct scmi_perf_proto_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, struct device *dev); - bool (*power_scale_mw_get)(const struct scmi_protocol_handle *ph); + enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; /** -- cgit v1.2.3