diff options
Diffstat (limited to 'drivers/cpufreq/cpufreq_interactive.c')
-rw-r--r-- | drivers/cpufreq/cpufreq_interactive.c | 1461 |
1 files changed, 1081 insertions, 380 deletions
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 9a6f64f56962..a376dfa8f412 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -2,7 +2,6 @@ * drivers/cpufreq/cpufreq_interactive.c * * Copyright (C) 2010 Google, Inc. - * Copyright (C) 2012-2013 Freescale Semiconductor, Inc. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -20,196 +19,474 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/cpufreq.h> -#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/rwsem.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/tick.h> #include <linux/time.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/mutex.h> +#include <linux/slab.h> #include <linux/kernel_stat.h> -#include <linux/module.h> #include <asm/cputime.h> -static atomic_t active_count = ATOMIC_INIT(0); +#define CREATE_TRACE_POINTS +#include <trace/events/cpufreq_interactive.h> struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; - int timer_idlecancel; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ u64 time_in_idle; - u64 idle_exit_time; - u64 timer_run_time; - int idling; - u64 freq_change_time; - u64 freq_change_time_in_idle; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; struct cpufreq_policy *policy; struct cpufreq_frequency_table *freq_table; + spinlock_t target_freq_lock; /*protects target freq */ unsigned int target_freq; + unsigned int floor_freq; + unsigned int max_freq; + u64 floor_validate_time; + u64 hispeed_validate_time; + struct rw_semaphore enable_sem; int governor_enabled; }; static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); -/* Workqueues handle frequency scaling */ -static struct task_struct *up_task; -static struct workqueue_struct *down_wq; -static struct work_struct freq_scale_down_work; -static cpumask_t up_cpumask; -static spinlock_t up_cpumask_lock; -static cpumask_t down_cpumask; -static spinlock_t down_cpumask_lock; -static struct mutex set_speed_lock; +/* realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; +static struct mutex gov_lock; + +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; + +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE +static unsigned int default_above_hispeed_delay[] = { + DEFAULT_ABOVE_HISPEED_DELAY }; + +struct cpufreq_interactive_tunables { + int usage_count; + /* Hi speed to bump to from lo speed when load burst (default max) */ + unsigned int hispeed_freq; + /* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 + unsigned long go_hispeed_load; + /* Target load. Lower values result in higher CPU speeds. */ + spinlock_t target_loads_lock; + unsigned int *target_loads; + int ntarget_loads; + /* + * The minimum amount of time to spend at a frequency before we can ramp + * down. + */ +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) + unsigned long min_sample_time; + /* + * The sample rate of the timer used to increase frequency + */ + unsigned long timer_rate; + /* + * Wait this long before raising speed above hispeed, by default a + * single timer interval. + */ + spinlock_t above_hispeed_delay_lock; + unsigned int *above_hispeed_delay; + int nabove_hispeed_delay; + /* Non-zero means indefinite speed boost active */ + int boost_val; + /* Duration of a boot pulse in usecs */ + int boostpulse_duration_val; + /* End time of boost pulse in ktime converted to usecs */ + u64 boostpulse_endtime; + /* + * Max additional time to wait in idle, beyond timer_rate, at speeds + * above minimum before wakeup to reduce speed, or -1 if unnecessary. + */ +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) + int timer_slack_val; + bool io_is_busy; +}; -/* Hi speed to bump to from lo speed when load burst (default max) */ -static u64 hispeed_freq; +/* For cases where we have single governor instance for system */ +struct cpufreq_interactive_tunables *common_tunables; -/* Go to hi speed when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 95 -static unsigned long go_hispeed_load; +static struct attribute_group *get_sysfs_attr(void); -/* - * The minimum amount of time to spend at a frequency before we can ramp down. +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time( + unsigned int cpu, + cputime64_t *wall, + bool io_is_busy) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + idle_time = get_cpu_idle_time_jiffy(cpu, wall); + else if (!io_is_busy) + idle_time += get_cpu_iowait_time_us(cpu, wall); + + return idle_time; +} + +static void cpufreq_interactive_timer_resched( + struct cpufreq_interactive_cpuinfo *pcpu) +{ + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + unsigned long expires; + unsigned long flags; + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(smp_processor_id(), + &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + expires = jiffies + usecs_to_jiffies(tunables->timer_rate); + mod_timer_pinned(&pcpu->cpu_timer, expires); + + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +/* The caller shall take enable_sem write semaphore to avoid any timer race. + * The cpu_timer and cpu_slack_timer must be deactivated when calling this + * function. */ -#define DEFAULT_MIN_SAMPLE_TIME (20 * USEC_PER_MSEC) -static unsigned long min_sample_time; +static void cpufreq_interactive_timer_start( + struct cpufreq_interactive_tunables *tunables, int cpu) +{ + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + unsigned long expires = jiffies + + usecs_to_jiffies(tunables->timer_rate); + unsigned long flags; + + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, cpu); + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, cpu); + } + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +static unsigned int freq_to_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay - 1 && + freq >= tunables->above_hispeed_delay[i+1]; i += 2) + ; + + ret = tunables->above_hispeed_delay[i]; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; +} + +static unsigned int freq_to_targetload( + struct cpufreq_interactive_tunables *tunables, unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads - 1 && + freq >= tunables->target_loads[i+1]; i += 2) + ; + + ret = tunables->target_loads[i]; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} /* - * The sample rate of the timer used to increase frequency + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. */ -#define DEFAULT_TIMER_RATE (50 * USEC_PER_MSEC) -#define CPUFREQ_IRQ_LEN 60 -#define CPUFREQ_NOTE_LEN 120 -static unsigned long timer_rate; +static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu, + unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; -static int cpufreq_governor_interactive(struct cpufreq_policy *policy, - unsigned int event); + freqmin = 0; + freqmax = UINT_MAX; -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE -static -#endif -struct cpufreq_governor cpufreq_gov_interactive = { - .name = "interactive", - .governor = cpufreq_governor_interactive, - .max_transition_latency = 10000000, - .owner = THIS_MODULE, -}; + do { + prevfreq = freq; + tl = freq_to_targetload(pcpu->policy->governor_data, freq); -static void cpufreq_interactive_timer(unsigned long data) + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; + } + } + + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; +} + +static u64 update_load(int cpu) { + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + u64 now; + u64 now_idle; unsigned int delta_idle; unsigned int delta_time; + u64 active_time; + + now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); + delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); + delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + + if (delta_time <= delta_idle) + active_time = 0; + else + active_time = delta_time - delta_idle; + + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; + + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; +} + +static void cpufreq_interactive_timer(unsigned long data) +{ + u64 now; + unsigned int delta_time; + u64 cputime_speedadj; int cpu_load; - int load_since_change; - u64 time_in_idle; - u64 idle_exit_time; struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, data); - u64 now_idle; + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; unsigned int new_freq; + unsigned int loadadjfreq; unsigned int index; unsigned long flags; + bool boosted; - smp_rmb(); - + if (!down_read_trylock(&pcpu->enable_sem)) + return; if (!pcpu->governor_enabled) goto exit; - /* - * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, - * this lets idle exit know the current idle time sample has - * been processed, and idle exit can generate a new sample and - * re-arm the timer. This prevents a concurrent idle - * exit on that CPU from writing a new set of info at the same time - * the timer function runs (the timer function can't use that info - * until more time passes). - */ - time_in_idle = pcpu->time_in_idle; - idle_exit_time = pcpu->idle_exit_time; - now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); - smp_wmb(); - - /* If we raced with cancelling a timer, skip. */ - if (!idle_exit_time) - goto exit; - - delta_idle = (unsigned int)(now_idle - time_in_idle); - delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time); + spin_lock_irqsave(&pcpu->load_lock, flags); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock_irqrestore(&pcpu->load_lock, flags); - /* - * If timer ran less than 1ms after short-term sample started, retry. - */ - if (delta_time < 1000) + if (WARN_ON_ONCE(!delta_time)) goto rearm; - if (delta_idle > delta_time) - cpu_load = 0; - else - cpu_load = 100 * (delta_time - delta_idle) / delta_time; - - delta_idle = (unsigned int)(now_idle - pcpu->freq_change_time_in_idle); - delta_time = (unsigned int)(pcpu->timer_run_time - - pcpu->freq_change_time); + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->target_freq; + boosted = tunables->boost_val || now < tunables->boostpulse_endtime; - if ((delta_time == 0) || (delta_idle > delta_time)) - load_since_change = 0; - else - load_since_change = - 100 * (delta_time - delta_idle) / delta_time; + if (cpu_load >= tunables->go_hispeed_load || boosted) { + if (pcpu->target_freq < tunables->hispeed_freq) { + new_freq = tunables->hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); - /* - * Choose greater of short-term load (since last idle timer - * started or timer function re-armed itself) or long-term load - * (since last frequency change). - */ - if (load_since_change > cpu_load) - cpu_load = load_since_change; - - if (cpu_load >= go_hispeed_load) { - if (pcpu->policy->cur == pcpu->policy->min) - new_freq = hispeed_freq; - else - new_freq = pcpu->policy->max * cpu_load / 100; + if (new_freq < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } } else { - new_freq = pcpu->policy->cur * cpu_load / 100; + new_freq = choose_freq(pcpu, loadadjfreq); } + if (pcpu->target_freq >= tunables->hispeed_freq && + new_freq > pcpu->target_freq && + now - pcpu->hispeed_validate_time < + freq_to_above_hispeed_delay(tunables, pcpu->target_freq)) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + pcpu->hispeed_validate_time = now; + if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, - new_freq, CPUFREQ_RELATION_H, + new_freq, CPUFREQ_RELATION_L, &index)) { - pr_warn_once("timer %d: cpufreq_frequency_table_target error\n", - (int) data); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); goto rearm; } new_freq = pcpu->freq_table[index].frequency; - if (pcpu->target_freq == new_freq) - goto rearm_if_notmax; /* - * Do not scale down unless we have been at this frequency for the - * minimum sample time. + * Do not scale below floor_freq unless we have been at or above the + * floor frequency for the minimum sample time since last validated. */ - if (new_freq < pcpu->target_freq) { - if ((pcpu->timer_run_time - pcpu->freq_change_time) - < min_sample_time) + if (new_freq < pcpu->floor_freq) { + if (now - pcpu->floor_validate_time < + tunables->min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); goto rearm; + } } - if (new_freq < pcpu->target_freq) { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&down_cpumask_lock, flags); - cpumask_set_cpu(data, &down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); - queue_work(down_wq, &freq_scale_down_work); - } else { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&up_cpumask_lock, flags); - cpumask_set_cpu(data, &up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); - wake_up_process(up_task); + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!boosted || new_freq > tunables->hispeed_freq) { + pcpu->floor_freq = new_freq; + pcpu->floor_validate_time = now; } + if (pcpu->target_freq == new_freq) { + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm_if_notmax; + } + + trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + + pcpu->target_freq = new_freq; + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(data, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + wake_up_process(speedchange_task); + rearm_if_notmax: /* * Already set max speed and don't see a need to change that, @@ -219,28 +496,11 @@ rearm_if_notmax: goto exit; rearm: - if (!timer_pending(&pcpu->cpu_timer)) { - /* - * If already at min: if that CPU is idle, don't set timer. - * Else cancel the timer if that CPU goes idle. We don't - * need to re-evaluate speed until the next idle exit. - */ - if (pcpu->target_freq == pcpu->policy->min) { - smp_rmb(); - - if (pcpu->idling) - goto exit; - - pcpu->timer_idlecancel = 1; - } - - pcpu->time_in_idle = get_cpu_idle_time_us( - data, &pcpu->idle_exit_time); - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); exit: + up_read(&pcpu->enable_sem); return; } @@ -250,14 +510,16 @@ static void cpufreq_interactive_idle_start(void) &per_cpu(cpuinfo, smp_processor_id()); int pending; - pcpu->idling = 1; - smp_wmb(); - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); return; + } + pending = timer_pending(&pcpu->cpu_timer); if (pcpu->target_freq != pcpu->policy->min) { -#ifdef CONFIG_SMP /* * Entering idle while not at lowest speed. On some * platforms this can hold the other CPU(s) at that speed @@ -266,33 +528,11 @@ static void cpufreq_interactive_idle_start(void) * min indefinitely. This should probably be a quirk of * the CPUFreq driver. */ - if (!pending) { - pcpu->time_in_idle = get_cpu_idle_time_us( - smp_processor_id(), &pcpu->idle_exit_time); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } -#endif - } else { - /* - * If at min speed and entering idle after load has - * already been evaluated, and a timer has been set just in - * case the CPU suddenly goes busy, cancel that timer. The - * CPU didn't go busy; we'll recheck things upon idle exit. - */ - if (pending && pcpu->timer_idlecancel) { - del_timer(&pcpu->cpu_timer); - /* - * Ensure last timer run time is after current idle - * sample start time, so next idle exit will always - * start a new idle sampling period. - */ - pcpu->idle_exit_time = 0; - pcpu->timer_idlecancel = 0; - } + if (!pending) + cpufreq_interactive_timer_resched(pcpu); } + up_read(&pcpu->enable_sem); } static void cpufreq_interactive_idle_end(void) @@ -300,163 +540,342 @@ static void cpufreq_interactive_idle_end(void) struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); - pcpu->idling = 0; - smp_wmb(); + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } - /* - * Arm the timer for 1-2 ticks later if not already, and if the timer - * function has already processed the previous load sampling - * interval. (If the timer is not pending but has not processed - * the previous interval, it is probably racing with us on another - * CPU. Let it compute load based on the previous sample and then - * re-arm the timer for another interval when it's done, rather - * than updating the interval start time to be "now", which doesn't - * give the timer function enough time to make a decision on this - * run.) - */ - if (timer_pending(&pcpu->cpu_timer) == 0 && - pcpu->timer_run_time >= pcpu->idle_exit_time && - pcpu->governor_enabled) { - pcpu->time_in_idle = - get_cpu_idle_time_us(smp_processor_id(), - &pcpu->idle_exit_time); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); } + up_read(&pcpu->enable_sem); } -static int cpufreq_interactive_up_task(void *data) +static int cpufreq_interactive_speedchange_task(void *data) { unsigned int cpu; + cpumask_t tmp_mask; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; while (1) { set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); - if (cpumask_empty(&up_cpumask)) { - spin_unlock_irqrestore(&up_cpumask_lock, flags); + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); schedule(); if (kthread_should_stop()) break; - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); } set_current_state(TASK_RUNNING); - cpumask_clear(&up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &tmp_mask) { unsigned int j; unsigned int max_freq = 0; pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); - - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) continue; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } - mutex_lock(&set_speed_lock); - - for_each_online_cpu(j) { + for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = &per_cpu(cpuinfo, j); + if (pjcpu->target_freq > max_freq) max_freq = pjcpu->target_freq; } + if (max_freq != pcpu->policy->cur) __cpufreq_driver_target(pcpu->policy, max_freq, CPUFREQ_RELATION_H); - mutex_unlock(&set_speed_lock); + trace_cpufreq_interactive_setspeed(cpu, + pcpu->target_freq, + pcpu->policy->cur); - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); + up_read(&pcpu->enable_sem); } } return 0; } -static void cpufreq_interactive_freq_down(struct work_struct *work) +static void cpufreq_interactive_boost(void) { - unsigned int cpu; - unsigned long flags; + int i; + int anyboost = 0; + unsigned long flags[2]; struct cpufreq_interactive_cpuinfo *pcpu; + struct cpufreq_interactive_tunables *tunables; - spin_lock_irqsave(&down_cpumask_lock, flags); - cpumask_clear(&down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); - for_each_online_cpu(cpu) { - unsigned int j; - unsigned int max_freq = 0; + for_each_online_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + tunables = pcpu->policy->governor_data; + + spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); + if (pcpu->target_freq < tunables->hispeed_freq) { + pcpu->target_freq = tunables->hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->hispeed_validate_time = + ktime_to_us(ktime_get()); + anyboost = 1; + } - pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); + /* + * Set floor freq and (re)start timer for when last + * validated. + */ - if (!pcpu->governor_enabled) - continue; + pcpu->floor_freq = tunables->hispeed_freq; + pcpu->floor_validate_time = ktime_to_us(ktime_get()); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + } - mutex_lock(&set_speed_lock); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); - for_each_online_cpu(j) { - struct cpufreq_interactive_cpuinfo *pjcpu = - &per_cpu(cpuinfo, j); + if (anyboost) + wake_up_process(speedchange_task); +} + +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; + unsigned long flags; - if (pjcpu->target_freq > max_freq) - max_freq = pjcpu->target_freq; + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } + + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + if (cpu != freq->cpu) { + if (!down_read_trylock(&pjcpu->enable_sem)) + continue; + if (!pjcpu->governor_enabled) { + up_read(&pjcpu->enable_sem); + continue; + } + } + spin_lock_irqsave(&pjcpu->load_lock, flags); + update_load(cpu); + spin_unlock_irqrestore(&pjcpu->load_lock, flags); + if (cpu != freq->cpu) + up_read(&pjcpu->enable_sem); } - if (max_freq != pcpu->policy->cur) - __cpufreq_driver_target(pcpu->policy, max_freq, - CPUFREQ_RELATION_H); + up_read(&pcpu->enable_sem); + } + return 0; +} - mutex_unlock(&set_speed_lock); - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) +{ + const char *cp; + int i; + int ntokens = 1; + unsigned int *tokenized_data; + int err = -EINVAL; + + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; + + if (!(ntokens & 0x1)) + goto err; + + tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!tokenized_data) { + err = -ENOMEM; + goto err; } + + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) + goto err_kfree; + + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } + + if (i != ntokens) + goto err_kfree; + + *num_tokens = ntokens; + return tokenized_data; + +err_kfree: + kfree(tokenized_data); +err: + return ERR_PTR(err); +} + +static ssize_t show_target_loads( + struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +static ssize_t store_target_loads( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ntokens; + unsigned int *new_target_loads = NULL; + unsigned long flags; + + new_target_loads = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_target_loads)) + return PTR_RET(new_target_loads); + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + if (tunables->target_loads != default_target_loads) + kfree(tunables->target_loads); + tunables->target_loads = new_target_loads; + tunables->ntarget_loads = ntokens; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return count; +} + +static ssize_t show_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay; i++) + ret += sprintf(buf + ret, "%u%s", + tunables->above_hispeed_delay[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; } -static ssize_t show_hispeed_freq(struct kobject *kobj, - struct attribute *attr, char *buf) +static ssize_t store_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) { - return sprintf(buf, "%llu\n", hispeed_freq); + int ntokens; + unsigned int *new_above_hispeed_delay = NULL; + unsigned long flags; + + new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_above_hispeed_delay)) + return PTR_RET(new_above_hispeed_delay); + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + if (tunables->above_hispeed_delay != default_above_hispeed_delay) + kfree(tunables->above_hispeed_delay); + tunables->above_hispeed_delay = new_above_hispeed_delay; + tunables->nabove_hispeed_delay = ntokens; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return count; + +} + +static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->hispeed_freq); } -static ssize_t store_hispeed_freq(struct kobject *kobj, - struct attribute *attr, const char *buf, - size_t count) +static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) { int ret; - u64 val; + long unsigned int val; - ret = strict_strtoull(buf, 0, &val); + ret = strict_strtoul(buf, 0, &val); if (ret < 0) return ret; - hispeed_freq = val; + tunables->hispeed_freq = val; return count; } -static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644, - show_hispeed_freq, store_hispeed_freq); +static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%lu\n", tunables->go_hispeed_load); +} + +static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->go_hispeed_load = val; + return count; +} -static ssize_t show_go_hispeed_load(struct kobject *kobj, - struct attribute *attr, char *buf) +static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables + *tunables, char *buf) { - return sprintf(buf, "%lu\n", go_hispeed_load); + return sprintf(buf, "%lu\n", tunables->min_sample_time); } -static ssize_t store_go_hispeed_load(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) +static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) { int ret; unsigned long val; @@ -464,21 +883,18 @@ static ssize_t store_go_hispeed_load(struct kobject *kobj, ret = strict_strtoul(buf, 0, &val); if (ret < 0) return ret; - go_hispeed_load = val; + tunables->min_sample_time = val; return count; } -static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644, - show_go_hispeed_load, store_go_hispeed_load); - -static ssize_t show_min_sample_time(struct kobject *kobj, - struct attribute *attr, char *buf) +static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables, + char *buf) { - return sprintf(buf, "%lu\n", min_sample_time); + return sprintf(buf, "%lu\n", tunables->timer_rate); } -static ssize_t store_min_sample_time(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) +static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) { int ret; unsigned long val; @@ -486,48 +902,262 @@ static ssize_t store_min_sample_time(struct kobject *kobj, ret = strict_strtoul(buf, 0, &val); if (ret < 0) return ret; - min_sample_time = val; + tunables->timer_rate = val; return count; } -static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644, - show_min_sample_time, store_min_sample_time); +static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%d\n", tunables->timer_slack_val); +} + +static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + tunables->timer_slack_val = val; + return count; +} -static ssize_t show_timer_rate(struct kobject *kobj, - struct attribute *attr, char *buf) +static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables, + char *buf) { - return sprintf(buf, "%lu\n", timer_rate); + return sprintf(buf, "%d\n", tunables->boost_val); } -static ssize_t store_timer_rate(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) +static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) { int ret; unsigned long val; - ret = strict_strtoul(buf, 0, &val); + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boost_val = val; + + if (tunables->boost_val) { + trace_cpufreq_interactive_boost("on"); + cpufreq_interactive_boost(); + } else { + tunables->boostpulse_endtime = ktime_to_us(ktime_get()); + trace_cpufreq_interactive_unboost("off"); + } + + return count; +} + +static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; - timer_rate = val; + + tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + + tunables->boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + cpufreq_interactive_boost(); return count; } -static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, - show_timer_rate, store_timer_rate); +static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%d\n", tunables->boostpulse_duration_val); +} -static struct attribute *interactive_attributes[] = { - &hispeed_freq_attr.attr, - &go_hispeed_load_attr.attr, - &min_sample_time_attr.attr, - &timer_rate_attr.attr, +static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_duration_val = val; + return count; +} + +static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->io_is_busy); +} + +static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->io_is_busy = val; + return count; +} + +/* + * Create show/store routines + * - sys: One governor instance for complete SYSTEM + * - pol: One governor instance per struct cpufreq_policy + */ +#define show_gov_pol_sys(file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return show_##file_name(common_tunables, buf); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return show_##file_name(policy->governor_data, buf); \ +} + +#define store_gov_pol_sys(file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, const char *buf, \ + size_t count) \ +{ \ + return store_##file_name(common_tunables, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + return store_##file_name(policy->governor_data, buf, count); \ +} + +#define show_store_gov_pol_sys(file_name) \ +show_gov_pol_sys(file_name); \ +store_gov_pol_sys(file_name) + +show_store_gov_pol_sys(target_loads); +show_store_gov_pol_sys(above_hispeed_delay); +show_store_gov_pol_sys(hispeed_freq); +show_store_gov_pol_sys(go_hispeed_load); +show_store_gov_pol_sys(min_sample_time); +show_store_gov_pol_sys(timer_rate); +show_store_gov_pol_sys(timer_slack); +show_store_gov_pol_sys(boost); +store_gov_pol_sys(boostpulse); +show_store_gov_pol_sys(boostpulse_duration); +show_store_gov_pol_sys(io_is_busy); + +#define gov_sys_attr_rw(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +gov_sys_pol_attr_rw(target_loads); +gov_sys_pol_attr_rw(above_hispeed_delay); +gov_sys_pol_attr_rw(hispeed_freq); +gov_sys_pol_attr_rw(go_hispeed_load); +gov_sys_pol_attr_rw(min_sample_time); +gov_sys_pol_attr_rw(timer_rate); +gov_sys_pol_attr_rw(timer_slack); +gov_sys_pol_attr_rw(boost); +gov_sys_pol_attr_rw(boostpulse_duration); +gov_sys_pol_attr_rw(io_is_busy); + +static struct global_attr boostpulse_gov_sys = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys); + +static struct freq_attr boostpulse_gov_pol = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol); + +/* One Governor instance for entire system */ +static struct attribute *interactive_attributes_gov_sys[] = { + &target_loads_gov_sys.attr, + &above_hispeed_delay_gov_sys.attr, + &hispeed_freq_gov_sys.attr, + &go_hispeed_load_gov_sys.attr, + &min_sample_time_gov_sys.attr, + &timer_rate_gov_sys.attr, + &timer_slack_gov_sys.attr, + &boost_gov_sys.attr, + &boostpulse_gov_sys.attr, + &boostpulse_duration_gov_sys.attr, + &io_is_busy_gov_sys.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group_gov_sys = { + .attrs = interactive_attributes_gov_sys, + .name = "interactive", +}; + +/* Per policy governor instance */ +static struct attribute *interactive_attributes_gov_pol[] = { + &target_loads_gov_pol.attr, + &above_hispeed_delay_gov_pol.attr, + &hispeed_freq_gov_pol.attr, + &go_hispeed_load_gov_pol.attr, + &min_sample_time_gov_pol.attr, + &timer_rate_gov_pol.attr, + &timer_slack_gov_pol.attr, + &boost_gov_pol.attr, + &boostpulse_gov_pol.attr, + &boostpulse_duration_gov_pol.attr, + &io_is_busy_gov_pol.attr, NULL, }; -static struct attribute_group interactive_attr_group = { - .attrs = interactive_attributes, +static struct attribute_group interactive_attr_group_gov_pol = { + .attrs = interactive_attributes_gov_pol, .name = "interactive", }; +static struct attribute_group *get_sysfs_attr(void) +{ + if (have_governor_per_policy()) + return &interactive_attr_group_gov_pol; + else + return &interactive_attr_group_gov_sys; +} + +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; + } + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event) { @@ -535,71 +1165,127 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int j; struct cpufreq_interactive_cpuinfo *pcpu; struct cpufreq_frequency_table *freq_table; + struct cpufreq_interactive_tunables *tunables; + unsigned long flags; + + if (have_governor_per_policy()) + tunables = policy->governor_data; + else + tunables = common_tunables; + + WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT)); switch (event) { + case CPUFREQ_GOV_POLICY_INIT: + if (have_governor_per_policy()) { + WARN_ON(tunables); + } else if (tunables) { + tunables->usage_count++; + policy->governor_data = tunables; + return 0; + } + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (!tunables) { + pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tunables->usage_count = 1; + tunables->above_hispeed_delay = default_above_hispeed_delay; + tunables->nabove_hispeed_delay = + ARRAY_SIZE(default_above_hispeed_delay); + tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; + tunables->target_loads = default_target_loads; + tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); + tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_rate = DEFAULT_TIMER_RATE; + tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_slack_val = DEFAULT_TIMER_SLACK; + + spin_lock_init(&tunables->target_loads_lock); + spin_lock_init(&tunables->above_hispeed_delay_lock); + + policy->governor_data = tunables; + if (!have_governor_per_policy()) + common_tunables = tunables; + + rc = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + if (rc) { + kfree(tunables); + policy->governor_data = NULL; + if (!have_governor_per_policy()) + common_tunables = NULL; + return rc; + } + + if (!policy->governor->initialized) { + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + break; + + case CPUFREQ_GOV_POLICY_EXIT: + if (!--tunables->usage_count) { + if (policy->governor->initialized == 1) { + cpufreq_unregister_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); + } + + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + kfree(tunables); + common_tunables = NULL; + } + + policy->governor_data = NULL; + break; + case CPUFREQ_GOV_START: - if (!cpu_online(policy->cpu)) - return -EINVAL; + mutex_lock(&gov_lock); - freq_table = - cpufreq_frequency_get_table(policy->cpu); + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!tunables->hispeed_freq) + tunables->hispeed_freq = policy->max; for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); pcpu->policy = policy; - if (pcpu->idling) - pcpu->target_freq = policy->min; - else - pcpu->target_freq = policy->cur; - + pcpu->target_freq = policy->cur; pcpu->freq_table = freq_table; - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(j, - &pcpu->freq_change_time); + pcpu->floor_freq = pcpu->target_freq; + pcpu->floor_validate_time = + ktime_to_us(ktime_get()); + pcpu->hispeed_validate_time = + pcpu->floor_validate_time; + pcpu->max_freq = policy->max; + down_write(&pcpu->enable_sem); + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer_start(tunables, j); pcpu->governor_enabled = 1; - smp_wmb(); + up_write(&pcpu->enable_sem); } - if (!hispeed_freq) - hispeed_freq = policy->max; - - /* - * Do not register the idle hook and create sysfs - * entries if we have already done so. - */ - if (atomic_inc_return(&active_count) > 1) - return 0; - - rc = sysfs_create_group(cpufreq_global_kobject, - &interactive_attr_group); - if (rc) - return rc; - + mutex_unlock(&gov_lock); break; case CPUFREQ_GOV_STOP: + mutex_lock(&gov_lock); for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); pcpu->governor_enabled = 0; - smp_wmb(); del_timer_sync(&pcpu->cpu_timer); - - /* - * Reset idle exit time since we may cancel the timer - * before it can run after the last idle exit time, - * to avoid tripping the check in idle exit for a timer - * that is trying to run. - */ - pcpu->idle_exit_time = 0; + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); } - flush_work(&freq_scale_down_work); - if (atomic_dec_return(&active_count) > 0) - return 0; - - sysfs_remove_group(cpufreq_global_kobject, - &interactive_attr_group); - + mutex_unlock(&gov_lock); break; case CPUFREQ_GOV_LIMITS: @@ -609,82 +1295,98 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, else if (policy->min > policy->cur) __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - break; - } - return 0; -} + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); -static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) -{ - switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); + down_read(&pcpu->enable_sem); + if (pcpu->governor_enabled == 0) { + up_read(&pcpu->enable_sem); + continue; + } + + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + if (policy->max < pcpu->target_freq) + pcpu->target_freq = policy->max; + else if (policy->min > pcpu->target_freq) + pcpu->target_freq = policy->min; + + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + up_read(&pcpu->enable_sem); + + /* Reschedule timer only if policy->max is raised. + * Delete the timers, else the timer callback may + * return without re-arm the timer when failed + * acquire the semaphore. This race may cause timer + * stopped unexpectedly. + */ + + if (policy->max > pcpu->max_freq) { + down_write(&pcpu->enable_sem); + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer_start(tunables, j); + up_write(&pcpu->enable_sem); + } + + pcpu->max_freq = policy->max; + } break; } - return 0; } -static struct notifier_block cpufreq_interactive_idle_nb = { - .notifier_call = cpufreq_interactive_idle_notifier, +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_interactive = { + .name = "interactive", + .governor = cpufreq_governor_interactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, }; +static void cpufreq_interactive_nop_timer(unsigned long data) +{ +} + static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; - struct sched_param param = { .sched_priority = 99 }; - - go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; - min_sample_time = DEFAULT_MIN_SAMPLE_TIME; - timer_rate = DEFAULT_TIMER_RATE; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - init_timer(&pcpu->cpu_timer); + init_timer_deferrable(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpufreq_interactive_timer; pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + spin_lock_init(&pcpu->target_freq_lock); + init_rwsem(&pcpu->enable_sem); } - up_task = kthread_create(cpufreq_interactive_up_task, NULL, - "kinteractiveup"); - if (IS_ERR(up_task)) - return PTR_ERR(up_task); - - sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); - get_task_struct(up_task); - - /* No rescuer thread, bind to CPU queuing the work for possibly - warm cache (probably doesn't matter much). */ - down_wq = alloc_workqueue("kinteractive_down", 0, 1); + spin_lock_init(&speedchange_cpumask_lock); + mutex_init(&gov_lock); + speedchange_task = + kthread_create(cpufreq_interactive_speedchange_task, NULL, + "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); - if (!down_wq) - goto err_freeuptask; + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); - INIT_WORK(&freq_scale_down_work, - cpufreq_interactive_freq_down); - - spin_lock_init(&up_cpumask_lock); - spin_lock_init(&down_cpumask_lock); - mutex_init(&set_speed_lock); - - idle_notifier_register(&cpufreq_interactive_idle_nb); + /* NB: wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); return cpufreq_register_governor(&cpufreq_gov_interactive); - -err_freeuptask: - put_task_struct(up_task); - return -ENOMEM; } #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE -late_initcall(cpufreq_interactive_init); +fs_initcall(cpufreq_interactive_init); #else module_init(cpufreq_interactive_init); #endif @@ -692,9 +1394,8 @@ module_init(cpufreq_interactive_init); static void __exit cpufreq_interactive_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_interactive); - kthread_stop(up_task); - put_task_struct(up_task); - destroy_workqueue(down_wq); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); } module_exit(cpufreq_interactive_exit); |