diff options
-rw-r--r-- | Documentation/cpu-freq/governors.txt | 36 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig | 16 | ||||
-rw-r--r-- | drivers/cpufreq/Makefile | 1 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_interactive.c | 681 | ||||
-rw-r--r-- | fs/ext4/super.c | 65 | ||||
-rw-r--r-- | fs/fuse/file.c | 10 | ||||
-rw-r--r-- | include/linux/cpufreq.h | 3 |
7 files changed, 786 insertions, 26 deletions
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index 737988fca64d..b37d805e19ea 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -28,6 +28,7 @@ Contents: 2.3 Userspace 2.4 Ondemand 2.5 Conservative +2.6 Interactive 3. The Governor Interface in the CPUfreq Core @@ -182,6 +183,41 @@ governor but for the opposite direction. For example when set to its default value of '20' it means that if the CPU usage needs to be below 20% between samples to have the frequency decreased. + +2.6 Interactive +--------------- + +The CPUfreq governor "interactive" is designed for latency-sensitive, +interactive workloads. This governor sets the CPU speed depending on +usage, similar to "ondemand" and "conservative" governors. However, +the governor is more aggressive about scaling the CPU speed up in +response to CPU-intensive activity. + +Sampling the CPU load every X ms can lead to under-powering the CPU +for X ms, leading to dropped frames, stuttering UI, etc. Instead of +sampling the cpu at a specified rate, the interactive governor will +check whether to scale the cpu frequency up soon after coming out of +idle. When the cpu comes out of idle, a timer is configured to fire +within 1-2 ticks. If the cpu is very busy between exiting idle and +when the timer fires then we assume the cpu is underpowered and ramp +to MAX speed. + +If the cpu was not sufficiently busy to immediately ramp to MAX speed, +then governor evaluates the cpu load since the last speed adjustment, +choosing th highest value between that longer-term load or the +short-term load since idle exit to determine the cpu speed to ramp to. + +The tuneable value for this governor are: + +min_sample_time: The minimum amount of time to spend at the current +frequency before ramping down. This is to ensure that the governor has +seen enough historic cpu load data to determine the appropriate +workload. Default is 80000 uS. + +go_maxspeed_load: The CPU load at which to ramp to max speed. Default +is 85. + + 3. The Governor Interface in the CPUfreq Core ============================================= diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index a8c8d9c19d74..cd3464b77b60 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -110,6 +110,16 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE Be aware that not all cpufreq drivers support the conservative governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE + help + Use the CPUFreq governor 'interactive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'interactive' governor for latency-sensitive workloads. + endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -167,6 +177,12 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_INTERACTIVE + tristate "'interactive' cpufreq policy governor" + help + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. + config CPU_FREQ_GOV_CONSERVATIVE tristate "'conservative' cpufreq governor" depends on CPU_FREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 71fc3b4173f1..30629f7dc747 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c new file mode 100644 index 000000000000..6069ca20a014 --- /dev/null +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -0,0 +1,681 @@ +/* + * drivers/cpufreq/cpufreq_interactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/cpufreq.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include <asm/cputime.h> + +static void (*pm_idle_old)(void); +static atomic_t active_count = ATOMIC_INIT(0); + +struct cpufreq_interactive_cpuinfo { + struct timer_list cpu_timer; + int timer_idlecancel; + u64 time_in_idle; + u64 idle_exit_time; + u64 timer_run_time; + int idling; + u64 freq_change_time; + u64 freq_change_time_in_idle; + struct cpufreq_policy *policy; + struct cpufreq_frequency_table *freq_table; + unsigned int target_freq; + int governor_enabled; +}; + +static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); + +/* Workqueues handle frequency scaling */ +static struct task_struct *up_task; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_down_work; +static cpumask_t up_cpumask; +static spinlock_t up_cpumask_lock; +static cpumask_t down_cpumask; +static spinlock_t down_cpumask_lock; + +/* Go to max speed when CPU load at or above this value. */ +#define DEFAULT_GO_MAXSPEED_LOAD 85 +static unsigned long go_maxspeed_load; + +/* + * The minimum amount of time to spend at a frequency before we can ramp down. + */ +#define DEFAULT_MIN_SAMPLE_TIME 80000; +static unsigned long min_sample_time; + +#define DEBUG 0 +#define BUFSZ 128 + +#if DEBUG +#include <linux/proc_fs.h> + +struct dbgln { + int cpu; + unsigned long jiffy; + unsigned long run; + char buf[BUFSZ]; +}; + +#define NDBGLNS 256 + +static struct dbgln dbgbuf[NDBGLNS]; +static int dbgbufs; +static int dbgbufe; +static struct proc_dir_entry *dbg_proc; +static spinlock_t dbgpr_lock; + +static u64 up_request_time; +static unsigned int up_max_latency; + +static void dbgpr(char *fmt, ...) +{ + va_list args; + int n; + unsigned long flags; + + spin_lock_irqsave(&dbgpr_lock, flags); + n = dbgbufe; + va_start(args, fmt); + vsnprintf(dbgbuf[n].buf, BUFSZ, fmt, args); + va_end(args); + dbgbuf[n].cpu = smp_processor_id(); + dbgbuf[n].run = nr_running(); + dbgbuf[n].jiffy = jiffies; + + if (++dbgbufe >= NDBGLNS) + dbgbufe = 0; + + if (dbgbufe == dbgbufs) + if (++dbgbufs >= NDBGLNS) + dbgbufs = 0; + + spin_unlock_irqrestore(&dbgpr_lock, flags); +} + +static void dbgdump(void) +{ + int i, j; + unsigned long flags; + static struct dbgln prbuf[NDBGLNS]; + + spin_lock_irqsave(&dbgpr_lock, flags); + i = dbgbufs; + j = dbgbufe; + memcpy(prbuf, dbgbuf, sizeof(dbgbuf)); + dbgbufs = 0; + dbgbufe = 0; + spin_unlock_irqrestore(&dbgpr_lock, flags); + + while (i != j) + { + printk("%lu %d %lu %s", + prbuf[i].jiffy, prbuf[i].cpu, prbuf[i].run, + prbuf[i].buf); + if (++i == NDBGLNS) + i = 0; + } +} + +static int dbg_proc_read(char *buffer, char **start, off_t offset, + int count, int *peof, void *dat) +{ + printk("max up_task latency=%uus\n", up_max_latency); + dbgdump(); + *peof = 1; + return 0; +} + + +#else +#define dbgpr(...) do {} while (0) +#endif + +static int cpufreq_governor_interactive(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_interactive = { + .name = "interactive", + .governor = cpufreq_governor_interactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static void cpufreq_interactive_timer(unsigned long data) +{ + unsigned int delta_idle; + unsigned int delta_time; + int cpu_load; + int load_since_change; + u64 time_in_idle; + u64 idle_exit_time; + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, data); + u64 now_idle; + unsigned int new_freq; + unsigned int index; + + /* + * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, + * this lets idle exit know the current idle time sample has + * been processed, and idle exit can generate a new sample and + * re-arm the timer. This prevents a concurrent idle + * exit on that CPU from writing a new set of info at the same time + * the timer function runs (the timer function can't use that info + * until more time passes). + */ + time_in_idle = pcpu->time_in_idle; + idle_exit_time = pcpu->idle_exit_time; + now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); + smp_wmb(); + + /* If we raced with cancelling a timer, skip. */ + if (!idle_exit_time) { + dbgpr("timer %d: no valid idle exit sample\n", (int) data); + goto exit; + } + +#if DEBUG + if ((int) jiffies - (int) pcpu->cpu_timer.expires >= 10) + dbgpr("timer %d: late by %d ticks\n", + (int) data, jiffies - pcpu->cpu_timer.expires); +#endif + + delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + idle_exit_time); + + /* + * If timer ran less than 1ms after short-term sample started, retry. + */ + if (delta_time < 1000) { + dbgpr("timer %d: time delta %u too short exit=%llu now=%llu\n", (int) data, + delta_time, idle_exit_time, pcpu->timer_run_time); + goto rearm; + } + + if (delta_idle > delta_time) + cpu_load = 0; + else + cpu_load = 100 * (delta_time - delta_idle) / delta_time; + + delta_idle = (unsigned int) cputime64_sub(now_idle, + pcpu->freq_change_time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + pcpu->freq_change_time); + + if (delta_idle > delta_time) + load_since_change = 0; + else + load_since_change = + 100 * (delta_time - delta_idle) / delta_time; + + /* + * Choose greater of short-term load (since last idle timer + * started or timer function re-armed itself) or long-term load + * (since last frequency change). + */ + if (load_since_change > cpu_load) + cpu_load = load_since_change; + + if (cpu_load >= go_maxspeed_load) + new_freq = pcpu->policy->max; + else + new_freq = pcpu->policy->max * cpu_load / 100; + + if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, + new_freq, CPUFREQ_RELATION_H, + &index)) { + dbgpr("timer %d: cpufreq_frequency_table_target error\n", (int) data); + goto rearm; + } + + new_freq = pcpu->freq_table[index].frequency; + + if (pcpu->target_freq == new_freq) + { + dbgpr("timer %d: load=%d, already at %d\n", (int) data, cpu_load, new_freq); + goto rearm_if_notmax; + } + + /* + * Do not scale down unless we have been at this frequency for the + * minimum sample time. + */ + if (new_freq < pcpu->target_freq) { + if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) < + min_sample_time) { + dbgpr("timer %d: load=%d cur=%d tgt=%d not yet\n", (int) data, cpu_load, pcpu->target_freq, new_freq); + goto rearm; + } + } + + dbgpr("timer %d: load=%d cur=%d tgt=%d queue\n", (int) data, cpu_load, pcpu->target_freq, new_freq); + + if (new_freq < pcpu->target_freq) { + pcpu->target_freq = new_freq; + spin_lock(&down_cpumask_lock); + cpumask_set_cpu(data, &down_cpumask); + spin_unlock(&down_cpumask_lock); + queue_work(down_wq, &freq_scale_down_work); + } else { + pcpu->target_freq = new_freq; +#if DEBUG + up_request_time = ktime_to_us(ktime_get()); +#endif + spin_lock(&up_cpumask_lock); + cpumask_set_cpu(data, &up_cpumask); + spin_unlock(&up_cpumask_lock); + wake_up_process(up_task); + } + +rearm_if_notmax: + /* + * Already set max speed and don't see a need to change that, + * wait until next idle to re-evaluate, don't need timer. + */ + if (pcpu->target_freq == pcpu->policy->max) + goto exit; + +rearm: + if (!timer_pending(&pcpu->cpu_timer)) { + /* + * If already at min: if that CPU is idle, don't set timer. + * Else cancel the timer if that CPU goes idle. We don't + * need to re-evaluate speed until the next idle exit. + */ + if (pcpu->target_freq == pcpu->policy->min) { + smp_rmb(); + + if (pcpu->idling) { + dbgpr("timer %d: cpu idle, don't re-arm\n", (int) data); + goto exit; + } + + pcpu->timer_idlecancel = 1; + } + + pcpu->time_in_idle = get_cpu_idle_time_us( + data, &pcpu->idle_exit_time); + mod_timer(&pcpu->cpu_timer, jiffies + 2); + dbgpr("timer %d: set timer for %lu exit=%llu\n", (int) data, pcpu->cpu_timer.expires, pcpu->idle_exit_time); + } + +exit: + return; +} + +static void cpufreq_interactive_idle(void) +{ + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + int pending; + + if (!pcpu->governor_enabled) { + pm_idle_old(); + return; + } + + pcpu->idling = 1; + smp_wmb(); + pending = timer_pending(&pcpu->cpu_timer); + + if (pcpu->target_freq != pcpu->policy->min) { +#ifdef CONFIG_SMP + /* + * Entering idle while not at lowest speed. On some + * platforms this can hold the other CPU(s) at that speed + * even though the CPU is idle. Set a timer to re-evaluate + * speed so this idle CPU doesn't hold the other CPUs above + * min indefinitely. This should probably be a quirk of + * the CPUFreq driver. + */ + if (!pending) { + pcpu->time_in_idle = get_cpu_idle_time_us( + smp_processor_id(), &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, jiffies + 2); + dbgpr("idle: enter at %d, set timer for %lu exit=%llu\n", + pcpu->target_freq, pcpu->cpu_timer.expires, + pcpu->idle_exit_time); + } +#endif + } else { + /* + * If at min speed and entering idle after load has + * already been evaluated, and a timer has been set just in + * case the CPU suddenly goes busy, cancel that timer. The + * CPU didn't go busy; we'll recheck things upon idle exit. + */ + if (pending && pcpu->timer_idlecancel) { + dbgpr("idle: cancel timer for %lu\n", pcpu->cpu_timer.expires); + del_timer(&pcpu->cpu_timer); + /* + * Ensure last timer run time is after current idle + * sample start time, so next idle exit will always + * start a new idle sampling period. + */ + pcpu->idle_exit_time = 0; + pcpu->timer_idlecancel = 0; + } + } + + pm_idle_old(); + pcpu->idling = 0; + smp_wmb(); + + /* + * Arm the timer for 1-2 ticks later if not already, and if the timer + * function has already processed the previous load sampling + * interval. (If the timer is not pending but has not processed + * the previous interval, it is probably racing with us on another + * CPU. Let it compute load based on the previous sample and then + * re-arm the timer for another interval when it's done, rather + * than updating the interval start time to be "now", which doesn't + * give the timer function enough time to make a decision on this + * run.) + */ + if (timer_pending(&pcpu->cpu_timer) == 0 && + pcpu->timer_run_time >= pcpu->idle_exit_time) { + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, jiffies + 2); + dbgpr("idle: exit, set timer for %lu exit=%llu\n", pcpu->cpu_timer.expires, pcpu->idle_exit_time); +#if DEBUG + } else if (timer_pending(&pcpu->cpu_timer) == 0 && + pcpu->timer_run_time < pcpu->idle_exit_time) { + dbgpr("idle: timer not run yet: exit=%llu tmrrun=%llu\n", + pcpu->idle_exit_time, pcpu->timer_run_time); +#endif + } + +} + +static int cpufreq_interactive_up_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + struct cpufreq_interactive_cpuinfo *pcpu; + +#if DEBUG + u64 now; + u64 then; + unsigned int lat; +#endif + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock(&up_cpumask_lock); + + if (cpumask_empty(&up_cpumask)) { + spin_unlock(&up_cpumask_lock); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock(&up_cpumask_lock); + } + + set_current_state(TASK_RUNNING); + +#if DEBUG + then = up_request_time; + now = ktime_to_us(ktime_get()); + + if (now > then) { + lat = ktime_to_us(ktime_get()) - then; + + if (lat > up_max_latency) + up_max_latency = lat; + } +#endif + + tmp_mask = up_cpumask; + cpumask_clear(&up_cpumask); + spin_unlock(&up_cpumask_lock); + + for_each_cpu(cpu, &tmp_mask) { + pcpu = &per_cpu(cpuinfo, cpu); + + if (nr_running() == 1) { + dbgpr("up %d: tgt=%d nothing else running\n", cpu, + pcpu->target_freq); + } + + __cpufreq_driver_target(pcpu->policy, + pcpu->target_freq, + CPUFREQ_RELATION_H); + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(cpu, + &pcpu->freq_change_time); + dbgpr("up %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur); + } + } + + return 0; +} + +static void cpufreq_interactive_freq_down(struct work_struct *work) +{ + unsigned int cpu; + cpumask_t tmp_mask; + struct cpufreq_interactive_cpuinfo *pcpu; + + spin_lock(&down_cpumask_lock); + tmp_mask = down_cpumask; + cpumask_clear(&down_cpumask); + spin_unlock(&down_cpumask_lock); + + for_each_cpu(cpu, &tmp_mask) { + pcpu = &per_cpu(cpuinfo, cpu); + __cpufreq_driver_target(pcpu->policy, + pcpu->target_freq, + CPUFREQ_RELATION_H); + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(cpu, + &pcpu->freq_change_time); + dbgpr("down %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur); + } +} + +static ssize_t show_go_maxspeed_load(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", go_maxspeed_load); +} + +static ssize_t store_go_maxspeed_load(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + return strict_strtoul(buf, 0, &go_maxspeed_load); +} + +static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644, + show_go_maxspeed_load, store_go_maxspeed_load); + +static ssize_t show_min_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", min_sample_time); +} + +static ssize_t store_min_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + return strict_strtoul(buf, 0, &min_sample_time); +} + +static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644, + show_min_sample_time, store_min_sample_time); + +static struct attribute *interactive_attributes[] = { + &go_maxspeed_load_attr.attr, + &min_sample_time_attr.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group = { + .attrs = interactive_attributes, + .name = "interactive", +}; + +static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy, + unsigned int event) +{ + int rc; + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, new_policy->cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(new_policy->cpu)) + return -EINVAL; + + pcpu->policy = new_policy; + pcpu->freq_table = cpufreq_frequency_get_table(new_policy->cpu); + pcpu->target_freq = new_policy->cur; + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(new_policy->cpu, + &pcpu->freq_change_time); + pcpu->governor_enabled = 1; + /* + * Do not register the idle hook and create sysfs + * entries if we have already done so. + */ + if (atomic_inc_return(&active_count) > 1) + return 0; + + rc = sysfs_create_group(cpufreq_global_kobject, + &interactive_attr_group); + if (rc) + return rc; + + pm_idle_old = pm_idle; + pm_idle = cpufreq_interactive_idle; + break; + + case CPUFREQ_GOV_STOP: + pcpu->governor_enabled = 0; + + if (atomic_dec_return(&active_count) > 0) + return 0; + + sysfs_remove_group(cpufreq_global_kobject, + &interactive_attr_group); + + pm_idle = pm_idle_old; + del_timer(&pcpu->cpu_timer); + break; + + case CPUFREQ_GOV_LIMITS: + if (new_policy->max < new_policy->cur) + __cpufreq_driver_target(new_policy, + new_policy->max, CPUFREQ_RELATION_H); + else if (new_policy->min > new_policy->cur) + __cpufreq_driver_target(new_policy, + new_policy->min, CPUFREQ_RELATION_L); + break; + } + return 0; +} + +static int __init cpufreq_interactive_init(void) +{ + unsigned int i; + struct cpufreq_interactive_cpuinfo *pcpu; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD; + min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + init_timer(&pcpu->cpu_timer); + pcpu->cpu_timer.function = cpufreq_interactive_timer; + pcpu->cpu_timer.data = i; + } + + up_task = kthread_create(cpufreq_interactive_up_task, NULL, + "kinteractiveup"); + if (IS_ERR(up_task)) + return PTR_ERR(up_task); + + sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); + get_task_struct(up_task); + + /* No rescuer thread, bind to CPU queuing the work for possibly + warm cache (probably doesn't matter much). */ + down_wq = alloc_workqueue("knteractive_down", 0, 1); + + if (! down_wq) + goto err_freeuptask; + + INIT_WORK(&freq_scale_down_work, + cpufreq_interactive_freq_down); + + spin_lock_init(&up_cpumask_lock); + spin_lock_init(&down_cpumask_lock); + +#if DEBUG + spin_lock_init(&dbgpr_lock); + dbg_proc = create_proc_entry("igov", S_IWUSR | S_IRUGO, NULL); + dbg_proc->read_proc = dbg_proc_read; +#endif + + return cpufreq_register_governor(&cpufreq_gov_interactive); + +err_freeuptask: + put_task_struct(up_task); + return -ENOMEM; +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +fs_initcall(cpufreq_interactive_init); +#else +module_init(cpufreq_interactive_init); +#endif + +static void __exit cpufreq_interactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_interactive); + kthread_stop(up_task); + put_task_struct(up_task); + destroy_workqueue(down_wq); +} + +module_exit(cpufreq_interactive_exit); + +MODULE_AUTHOR("Mike Chan <mike@android.com>"); +MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for " + "Latency sensitive workloads"); +MODULE_LICENSE("GPL"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 84b0ef45e24e..e13b302ae02b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2924,6 +2924,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + err = percpu_counter_init(&sbi->s_freeblocks_counter, + ext4_count_free_blocks(sb)); + if (!err) { + err = percpu_counter_init(&sbi->s_freeinodes_counter, + ext4_count_free_inodes(sb)); + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirs_counter, + ext4_count_dirs(sb)); + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); + } + if (err) { + ext4_msg(sb, KERN_ERR, "insufficient memory"); + goto failed_mount3; + } + sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_max_writeback_mb_bump = 128; @@ -3022,22 +3040,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); -no_journal: - err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext4_count_free_blocks(sb)); - if (!err) - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - if (!err) - err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); - if (!err) - err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); - if (err) { - ext4_msg(sb, KERN_ERR, "insufficient memory"); - goto failed_mount_wq; - } + /* + * The journal may have updated the bg summary counts, so we + * need to update the global counters. + */ + percpu_counter_set(&sbi->s_freeblocks_counter, + ext4_count_free_blocks(sb)); + percpu_counter_set(&sbi->s_freeinodes_counter, + ext4_count_free_inodes(sb)); + percpu_counter_set(&sbi->s_dirs_counter, + ext4_count_dirs(sb)); + percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); +no_journal: EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); if (!EXT4_SB(sb)->dio_unwritten_wq) { printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); @@ -3184,10 +3199,6 @@ failed_mount_wq: jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount3: if (sbi->s_flex_groups) { if (is_vmalloc_addr(sbi->s_flex_groups)) @@ -3195,6 +3206,10 @@ failed_mount3: else kfree(sbi->s_flex_groups); } + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -3523,13 +3538,11 @@ static int ext4_commit_super(struct super_block *sb, int sync) else es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) - ext4_free_blocks_count_set(es, percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeblocks_counter)); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) - es->s_free_inodes_count = - cpu_to_le32(percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeinodes_counter)); + ext4_free_blocks_count_set(es, percpu_counter_sum_positive( + &EXT4_SB(sb)->s_freeblocks_counter)); + es->s_free_inodes_count = + cpu_to_le32(percpu_counter_sum_positive( + &EXT4_SB(sb)->s_freeinodes_counter)); sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c8224587123f..9242d294fe90 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -134,6 +134,7 @@ EXPORT_SYMBOL_GPL(fuse_do_open); void fuse_finish_open(struct inode *inode, struct file *file) { struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = get_fuse_conn(inode); if (ff->open_flags & FOPEN_DIRECT_IO) file->f_op = &fuse_direct_io_file_operations; @@ -141,6 +142,15 @@ void fuse_finish_open(struct inode *inode, struct file *file) invalidate_inode_pages2(inode->i_mapping); if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); + if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + i_size_write(inode, 0); + spin_unlock(&fc->lock); + fuse_invalidate_attr(inode); + } } int fuse_open_common(struct inode *inode, struct file *file, bool isdir) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c3e9de8321c6..e71e0f6ecd58 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -364,6 +364,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) extern struct cpufreq_governor cpufreq_gov_conservative; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE) +extern struct cpufreq_governor cpufreq_gov_interactive; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive) #endif |