From de1df26b7cef702a32ae876ed45c1112f523df48 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 02:37:42 +0100 Subject: cpufreq: Clean up default and fallback governor setup The preprocessor magic used for setting the default cpufreq governor (and for using the performance governor as a fallback one for that matter) is really nasty, so replace it with __weak functions and overrides. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 88a4215125bc..d0bf555b6bbf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -464,29 +464,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -/* CPUFREQ DEFAULT GOVERNOR */ -/* - * Performance governor is fallback governor if any other gov failed to auto - * load due latency restrictions - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE -extern struct cpufreq_governor cpufreq_gov_performance; -#endif -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) -extern struct cpufreq_governor cpufreq_gov_powersave; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) -extern struct cpufreq_governor cpufreq_gov_userspace; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) -extern struct cpufreq_governor cpufreq_gov_ondemand; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) -extern struct cpufreq_governor cpufreq_gov_conservative; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) -#endif +struct cpufreq_governor *cpufreq_default_governor(void); +struct cpufreq_governor *cpufreq_fallback_governor(void); /********************************************************************* * FREQUENCY TABLE HELPERS * -- cgit v1.2.3 From 9f8ea969d5cfdd4353d2adb004e8e2286b984369 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:33 +0530 Subject: PM / OPP: get/put regulators from OPP core This allows the OPP core to request/free the regulator resource, attached to a device OPP. The regulator device is fetched using the name provided by the driver, while calling: dev_pm_opp_set_regulator(). This will work for both OPP-v1 and v2 bindings. This is a preliminary step for moving the OPP switching logic into the OPP core. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 95403d2ccaf5..c70a18ac9c8a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -60,6 +60,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, void dev_pm_opp_put_supported_hw(struct device *dev); int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); +int dev_pm_opp_set_regulator(struct device *dev, const char *name); +void dev_pm_opp_put_regulator(struct device *dev); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -151,6 +153,13 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) static inline void dev_pm_opp_put_prop_name(struct device *dev) {} +static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_regulator(struct device *dev) {} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -- cgit v1.2.3 From 655c9df961751ce21466f6e97e8033932c27a675 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:35 +0530 Subject: PM / OPP: Introduce dev_pm_opp_get_max_volt_latency() In few use cases (like: cpufreq), it is desired to get the maximum voltage latency for changing OPPs. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index c70a18ac9c8a..5daa43058ac1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); +unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, @@ -88,6 +89,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) return 0; } +static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) +{ + return 0; +} + static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { return NULL; -- cgit v1.2.3 From 2174344765f472895c076d703c9cdc58215e1393 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:36 +0530 Subject: PM / OPP: Introduce dev_pm_opp_get_max_transition_latency() In few use cases (like: cpufreq), it is desired to get the maximum latency for changing OPPs. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5daa43058ac1..59da3d9e11ea 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -35,6 +35,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); +unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev); struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, @@ -94,6 +95,11 @@ static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) return 0; } +static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) +{ + return 0; +} + static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { return NULL; -- cgit v1.2.3 From 6a0712f6f199e737aa5913d28ec4bd3a25de9660 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:39 +0530 Subject: PM / OPP: Add dev_pm_opp_set_rate() This adds a routine, dev_pm_opp_set_rate(), responsible for configuring power-supply and clock source for an OPP. The OPP is found by matching against the target_freq passed to the routine. This shall replace similar code present in most of the OPP users and help simplify them a lot. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 59da3d9e11ea..cccaf4a29e9f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -64,6 +64,7 @@ int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); +int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -172,6 +173,11 @@ static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) static inline void dev_pm_opp_put_regulator(struct device *dev) {} +static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) +{ + return -EINVAL; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -- cgit v1.2.3 From fc5cbf0c94b6f7fd62fdddff892207290510945d Mon Sep 17 00:00:00 2001 From: Axel Haslam Date: Mon, 15 Feb 2016 11:10:51 +0100 Subject: PM / Domains: Support for multiple states Some hardware (eg. OMAP), has the ability to enter different low power modes for a given power domain. This allows for more fine grained control over the power state of the platform. As a typical example, some registers of the hardware may be implemented with retention flip-flops and be able to retain their state at lower voltages allowing for faster on/off latencies and an increased window of opportunity to enter an intermediate low power state other than "off" When trying to set a power domain to off, the genpd governor will choose the deepest state that will respect the qos constraints of all the devices and sub-domains on the power domain. The state chosen by the governor is saved in the "state_idx" field of the generic_pm_domain structure and shall be used by the power_off and power_on callbacks to perform the necessary actions to set the power domain into (and out of) the state indicated by state_idx. States must be declared in ascending order from shallowest to deepest, deepest meaning the state which takes longer to enter and exit. For platforms that don't declare any states, a single a single "off" state is used. Once all platforms are converted to use the state array, the legacy on/off latencies will be removed. [ Lina: Modified genpd state initialization and remove use of save_state_latency_ns in genpd timing data ] Suggested-by: Kevin Hilman Signed-off-by: Lina Iyer Signed-off-by: Axel Haslam Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index db21d3995f7e..1726c4ac6529 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -19,6 +19,8 @@ /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ +#define GENPD_MAX_NUM_STATES 8 /* Number of possible low power states */ + enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ GPD_STATE_POWER_OFF, /* PM domain is off */ @@ -37,6 +39,11 @@ struct gpd_dev_ops { bool (*active_wakeup)(struct device *dev); }; +struct genpd_power_state { + s64 power_off_latency_ns; + s64 power_on_latency_ns; +}; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ @@ -66,6 +73,10 @@ struct generic_pm_domain { void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ + struct genpd_power_state states[GENPD_MAX_NUM_STATES]; + unsigned int state_count; /* number of states */ + unsigned int state_idx; /* state that genpd will go to when off */ + }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) -- cgit v1.2.3 From 90e63452ac3a42c9ff10b12880429e8592cf39ec Mon Sep 17 00:00:00 2001 From: Axel Haslam Date: Mon, 15 Feb 2016 11:10:53 +0100 Subject: PM / Domains: remove old power on/off latencies Now that all known users have been converted to use state latencies, we can remove the latency field in the generic_pm_domain structure. Signed-off-by: Axel Haslam Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1726c4ac6529..49cd8890b873 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,9 +61,7 @@ struct generic_pm_domain { unsigned int prepared_count; /* Suspend counter of prepared devices */ bool suspend_power_off; /* Power status before system suspend */ int (*power_off)(struct generic_pm_domain *domain); - s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); - s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; -- cgit v1.2.3 From 34b0870515aaac6b7ea1ffdc370516b0a8024c82 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Feb 2016 00:22:57 +0100 Subject: cpufreq: Simplify the cpufreq_for_each_valid_entry() That macro uses an internal static inline function that is first totally unnecessary and second hard to read, so simplify it and get rid of that monster. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..4064cfcfbffd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -504,16 +504,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, } #endif -static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) -{ - while ((*pos)->frequency != CPUFREQ_TABLE_END) - if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID) - return true; - else - (*pos)++; - return false; -} - /* * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * @pos: the cpufreq_frequency_table * to use as a loop cursor. @@ -530,8 +520,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) * @table: the cpufreq_frequency_table * to iterate over. */ -#define cpufreq_for_each_valid_entry(pos, table) \ - for (pos = table; cpufreq_next_valid(&pos); pos++) +#define cpufreq_for_each_valid_entry(pos, table) \ + for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++) \ + if (pos->frequency == CPUFREQ_ENTRY_INVALID) \ + continue; \ + else int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); -- cgit v1.2.3 From e237a5518425155faa508a087f28269f58074b92 Mon Sep 17 00:00:00 2001 From: Chen Fan Date: Mon, 15 Feb 2016 12:52:01 +0800 Subject: x86/ACPI/PCI: Recognize that Interrupt Line 255 means "not connected" Per the x86-specific footnote to PCI spec r3.0, sec 6.2.4, the value 255 in the Interrupt Line register means "unknown" or "no connection." Previously, when we couldn't derive an IRQ from the _PRT, we fell back to using the value from Interrupt Line as an IRQ. It's questionable whether we should do that at all, but the spec clearly suggests we shouldn't do it for the value 255 on x86. Calling request_irq() with IRQ 255 may succeed, but the driver won't receive any interrupts. Or, if IRQ 255 is shared with another device, it may succeed, and the driver's ISR will be called at random times when the *other* device interrupts. Or it may fail if another device is using IRQ 255 with incompatible flags. What we *want* is for request_irq() to fail predictably so the driver can fall back to polling. On x86, assume 255 in the Interrupt Line means the INTx line is not connected. In that case, set dev->irq to IRQ_NOTCONNECTED so request_irq() will fail gracefully with -ENOTCONN. We found this problem on a system where Secure Boot firmware assigned Interrupt Line 255 to an i801_smbus device and another device was already using MSI-X IRQ 255. This was in v3.10, where i801_probe() fails if request_irq() fails: i801_smbus 0000:00:1f.3: enabling device (0140 -> 0143) i801_smbus 0000:00:1f.3: can't derive routing for PCI INT C i801_smbus 0000:00:1f.3: PCI INT C: no GSI genirq: Flags mismatch irq 255. 00000080 (i801_smbus) vs. 00000000 (megasa) CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 3.10.0-229.el7.x86_64 #1 Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS PRIMEQUEST 2000 Serie5 Call Trace: dump_stack+0x19/0x1b __setup_irq+0x54a/0x570 request_threaded_irq+0xcc/0x170 i801_probe+0x32f/0x508 [i2c_i801] local_pci_probe+0x45/0xa0 i801_smbus 0000:00:1f.3: Failed to allocate irq 255: -16 i801_smbus: probe of 0000:00:1f.3 failed with error -16 After aeb8a3d16ae0 ("i2c: i801: Check if interrupts are disabled"), i801_probe() will fall back to polling if request_irq() fails. But we still need this patch because request_irq() may succeed or fail depending on other devices in the system. If request_irq() fails, i801_smbus will work by falling back to polling, but if it succeeds, i801_smbus won't work because it expects interrupts that it may not receive. Signed-off-by: Chen Fan Acked-by: Thomas Gleixner Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/interrupt.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0e95fcc75b2a..358076eda364 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -125,6 +125,16 @@ struct irqaction { extern irqreturn_t no_action(int cpl, void *dev_id); +/* + * If a (PCI) device interrupt is not connected we set dev->irq to + * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we + * can distingiush that case from other error returns. + * + * 0x80000000 is guaranteed to be outside the available range of interrupts + * and easy to distinguish from other possible incorrect values. + */ +#define IRQ_NOTCONNECTED (1U << 31) + extern int __must_check request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, -- cgit v1.2.3 From 34e2c555f3e13c90e9284e23d00f03be8a6e06c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 20:20:42 +0100 Subject: cpufreq: Add mechanism for registering utilization update callbacks Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- include/linux/cpufreq.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..704d85bf7242 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} + +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); + unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else +static inline void cpufreq_update_util(u64 time, unsigned long util, + unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} + static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; -- cgit v1.2.3 From 68e80dae09033d778b98dc88e5bfe8fdade188e5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:35 +0530 Subject: Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT" Earlier, when the struct freq-attr was used to represent governor attributes, the standard cpufreq show/store sysfs attribute callbacks were applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That could have resulted in an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. The previous commit, "cpufreq: governor: New sysfs show/store callbacks for governor tunables", fixed the original ABBA deadlock by adding new governor specific show/store callbacks. We don't have to drop rwsem around invocations of governor event CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now. Fixes: 955ef4833574 (cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT) Signed-off-by: Viresh Kumar Reported-by: Juri Lelli Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 704d85bf7242..cac3d1ba8200 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem; -- cgit v1.2.3 From 242aa883a64d8c54cfeee47f3603b21bc705e081 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:44 +0530 Subject: cpufreq: Remove 'policy->governor_enabled' The entire sequence of events (like INIT/START or STOP/EXIT) for which cpufreq_governor() is called, is guaranteed to be protected by policy->rwsem now. The additional checks that were added earlier (as we were forced to drop policy->rwsem before calling cpufreq_governor() for EXIT event), aren't required anymore. Over that, they weren't sufficient really. They just take care of START/STOP events, but not INIT/EXIT and the state machine was never maintained properly by them. Kill the unnecessary checks and policy->governor_enabled field. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cac3d1ba8200..a50c5b2e3bf2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,7 +80,6 @@ struct cpufreq_policy { unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; - bool governor_enabled; /* governor start/stop flag */ char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be -- cgit v1.2.3 From adaf9fcd136970e480d7ca834c0cf25ce922ea74 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Mar 2016 20:44:47 +0100 Subject: cpufreq: Move scheduler-related code to the sched directory Create cpufreq.c under kernel/sched/ and move the cpufreq code related to the scheduler to that file and to sched.h. Redefine cpufreq_update_util() as a static inline function to avoid function calls at its call sites in the scheduler code (as suggested by Peter Zijlstra). Also move the definition of struct update_util_data and declaration of cpufreq_set_update_util_data() from include/linux/cpufreq.h to include/linux/sched.h. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/cpufreq.h | 34 ---------------------------------- include/linux/sched.h | 9 +++++++++ 2 files changed, 9 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a50c5b2e3bf2..a5ea52f793f3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -146,36 +146,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. - * - * The way cpufreq is currently arranged requires it to evaluate the CPU - * performance state (frequency/voltage) on a regular basis to prevent it from - * being stuck in a completely inadequate performance level for too long. - * That is not guaranteed to happen if the updates are only triggered from CFS, - * though, because they may not be coming in if RT or deadline tasks are active - * all the time (or there are RT and DL tasks only). - * - * As a workaround for that issue, this function is called by the RT and DL - * sched classes to trigger extra cpufreq updates to prevent it from stalling, - * but that really is a band-aid. Going forward it should be replaced with - * solutions targeted more specifically at RT and DL tasks. - */ -static inline void cpufreq_trigger_update(u64 time) -{ - cpufreq_update_util(time, ULONG_MAX, 0); -} - -struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); -}; - -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); - unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -187,10 +157,6 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else -static inline void cpufreq_update_util(u64 time, unsigned long util, - unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} - static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..913e755ef7b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +#endif /* CONFIG_CPU_FREQ */ + #endif -- cgit v1.2.3