diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-17 09:51:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-17 09:51:50 -0700 |
commit | c30938d59e7468259855da91a885b19e8044b5f4 (patch) | |
tree | 15fa3b7c4696947d43702273291398a91232f644 | |
parent | aa2638a210ab0d7c6702cd54315365785fce326c (diff) | |
parent | 8e7c25971b1590776a90b249de3d859dd45e7414 (diff) |
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq:
[CPUFREQ] cpumask: new cpumask operators for arch/x86/kernel/cpu/cpufreq/powernow-k8.c
[CPUFREQ] cpumask: avoid playing with cpus_allowed in powernow-k8.c
[CPUFREQ] cpumask: avoid cpumask games in arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
[CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c
[CPUFREQ] powernow-k8: get drv data for correct CPU
[CPUFREQ] powernow-k8: read P-state from HW
[CPUFREQ] reduce scope of ACPI_PSS_BIOS_BUG_MSG[]
[CPUFREQ] Clean up convoluted code in arch/x86/kernel/tsc.c:time_cpufreq_notifier()
[CPUFREQ] minor correction to cpu-freq documentation
[CPUFREQ] powernow-k8.c: mess cleanup
[CPUFREQ] Only set sampling_rate_max deprecated, sampling_rate_min is useful
[CPUFREQ] powernow-k8: Set transition latency to 1 if ACPI tables export 0
[CPUFREQ] ondemand: Uncouple minimal sampling rate from HZ in NO_HZ case
-rw-r--r-- | Documentation/cpu-freq/cpu-drivers.txt | 2 | ||||
-rw-r--r-- | Documentation/cpu-freq/governors.txt | 26 | ||||
-rw-r--r-- | Documentation/cpu-freq/user-guide.txt | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 191 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 60 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 8 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 61 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 68 |
11 files changed, 235 insertions, 287 deletions
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 43c743903dd7..75a58d14d3cf 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -155,7 +155,7 @@ actual frequency must be determined using the following rules: - if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal target_freq. ("H for highest, but no higher than") -Here again the frequency table helper might assist you - see section 3 +Here again the frequency table helper might assist you - see section 2 for details. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index ce73f3eb5ddb..aed082f49d09 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -119,10 +119,6 @@ want the kernel to look at the CPU usage and to make decisions on what to do about the frequency. Typically this is set to values of around '10000' or more. It's default value is (cmp. with users-guide.txt): transition_latency * 1000 -The lowest value you can set is: -transition_latency * 100 or it may get restricted to a value where it -makes not sense for the kernel anymore to poll that often which depends -on your HZ config variable (HZ=1000: max=20000us, HZ=250: max=5000). Be aware that transition latency is in ns and sampling_rate is in us, so you get the same sysfs value by default. Sampling rate should always get adjusted considering the transition latency @@ -131,14 +127,20 @@ in the bash (as said, 1000 is default), do: echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ >ondemand/sampling_rate -show_sampling_rate_(min|max): THIS INTERFACE IS DEPRECATED, DON'T USE IT. -You can use wider ranges now and the general -cpuinfo_transition_latency variable (cmp. with user-guide.txt) can be -used to obtain exactly the same info: -show_sampling_rate_min = transtition_latency * 500 / 1000 -show_sampling_rate_max = transtition_latency * 500000 / 1000 -(divided by 1000 is to illustrate that sampling rate is in us and -transition latency is exported ns). +show_sampling_rate_min: +The sampling rate is limited by the HW transition latency: +transition_latency * 100 +Or by kernel restrictions: +If CONFIG_NO_HZ is set, the limit is 10ms fixed. +If CONFIG_NO_HZ is not set or no_hz=off boot parameter is used, the +limits depend on the CONFIG_HZ option: +HZ=1000: min=20000us (20ms) +HZ=250: min=80000us (80ms) +HZ=100: min=200000us (200ms) +The highest value of kernel and HW latency restrictions is shown and +used as the minimum sampling rate. + +show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. up_threshold: defines what the average CPU usage between the samplings of 'sampling_rate' needs to be for the kernel to make a decision on diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 75f41193f3e1..5d5f5fadd1c2 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -31,7 +31,6 @@ Contents: 3. How to change the CPU cpufreq policy and/or speed 3.1 Preferred interface: sysfs -3.2 Deprecated interfaces diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index cf52215d9eb1..81cbe64ed6b4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1,3 +1,4 @@ + /* * (c) 2003-2006 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the @@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - if (data->currpstate == HW_PSTATE_INVALID) { - /* read (initial) hw pstate if not yet set */ - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - - /* - * a workaround for family 11h erratum 311 might cause - * an "out-of-range Pstate if the core is in Pstate-0 - */ - if (i >= data->numps) - data->currpstate = HW_PSTATE_0; - else - data->currpstate = i; - } + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + data->currpstate = i; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) + data->currpstate = HW_PSTATE_0; + return 0; } do { @@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, return 0; } -static int check_supported_cpu(unsigned int cpu) +static void check_supported_cpu(void *_rc) { - cpumask_t oldmask; u32 eax, ebx, ecx, edx; - unsigned int rc = 0; - - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + int *rc = _rc; - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); - goto out; - } + *rc = -ENODEV; if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) - goto out; + return; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) - goto out; + return; if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); - goto out; + return; } eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { printk(KERN_INFO PFX "No frequency change capabilities detected\n"); - goto out; + return; } cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); @@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu) != P_STATE_TRANSITION_CAPABLE) { printk(KERN_INFO PFX "Power state transitions not supported\n"); - goto out; + return; } } else { /* must be a HW Pstate capable processor */ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) cpu_family = CPU_HW_PSTATE; else - goto out; + return; } - rc = 1; - -out: - set_cpus_allowed_ptr(current, &oldmask); - return rc; + *rc = 0; } static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, @@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; - control = data->acpi_data.states[index].control; data->irt = (control - >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> - RVO_SHIFT) & RVO_MASK; data->exttype = (control - >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 - << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = - (control >> VST_SHIFT) & VST_MASK; } + control = data->acpi_data.states[index].control; + data->irt = (control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (control >> VST_SHIFT) & VST_MASK; +} static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { @@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data) if (cur_latency > max_latency) max_latency = cur_latency; } + if (max_latency == 0) { + /* + * Fam 11h always returns 0 as transition latency. + * This is intended and means "very fast". While cpufreq core + * and governors currently can handle that gracefully, better + * set it to 1 to avoid problems in the future. + * For all others it's a BIOS bug. + */ + if (!boot_cpu_data.x86 == 0x11) + printk(KERN_ERR FW_WARN PFX "Invalid zero transition " + "latency\n"); + max_latency = 1; + } /* value in usecs, needs to be in nanoseconds */ return 1000 * max_latency; } @@ -1093,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -1101,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, res = transition_fid_vid(data, fid, vid); freqs.new = find_khz_freq_from_fid(data->currfid); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -1126,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -1134,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, res = transition_pstate(data, pstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -1235,21 +1236,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } -static const char ACPI_PSS_BIOS_BUG_MSG[] = - KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; +struct init_on_cpu { + struct powernow_k8_data *data; + int rc; +}; + +static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) +{ + struct init_on_cpu *init_on_cpu = _init_on_cpu; + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + init_on_cpu->rc = -ENODEV; + return; + } + + if (query_current_values_with_pending_wait(init_on_cpu->data)) { + init_on_cpu->rc = -ENODEV; + return; + } + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + init_on_cpu->rc = 0; +} /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { + static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; - cpumask_t oldmask; + struct init_on_cpu init_on_cpu; int rc; if (!cpu_online(pol->cpu)) return -ENODEV; - if (!check_supported_cpu(pol->cpu)) + smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); + if (rc) return -ENODEV; data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); @@ -1289,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out_unmask; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out_unmask; - } - - if (query_current_values_with_pending_wait(data)) - goto err_out_unmask; - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - /* run on any CPU again */ - set_cpus_allowed_ptr(current, &oldmask); + init_on_cpu.data = data; + smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, + &init_on_cpu, 1); + rc = init_on_cpu.rc; + if (rc != 0) + goto err_out_exit_acpi; if (cpu_family == CPU_HW_PSTATE) cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); @@ -1346,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; -err_out_unmask: - set_cpus_allowed_ptr(current, &oldmask); +err_out_exit_acpi: powernow_k8_cpu_exit_acpi(data); err_out: @@ -1372,28 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) return 0; } +static void query_values_on_cpu(void *_err) +{ + int *err = _err; + struct powernow_k8_data *data = __get_cpu_var(powernow_data); + + *err = query_current_values_with_pending_wait(data); +} + static unsigned int powernowk8_get(unsigned int cpu) { - struct powernow_k8_data *data; - cpumask_t oldmask = current->cpus_allowed; + struct powernow_k8_data *data = per_cpu(powernow_data, cpu); unsigned int khz = 0; - unsigned int first; - - first = cpumask_first(cpu_core_mask(cpu)); - data = per_cpu(powernow_data, first); + int err; if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX - "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed_ptr(current, &oldmask); - return 0; - } - - if (query_current_values_with_pending_wait(data)) + smp_call_function_single(cpu, query_values_on_cpu, &err, true); + if (err) goto out; if (cpu_family == CPU_HW_PSTATE) @@ -1404,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu) out: - set_cpus_allowed_ptr(current, &oldmask); return khz; } @@ -1430,7 +1437,9 @@ static int __cpuinit powernowk8_init(void) unsigned int i, supported_cpus = 0; for_each_online_cpu(i) { - if (check_supported_cpu(i)) + int rc; + smp_call_function_single(i, check_supported_cpu, &rc, 1); + if (rc == 0) supported_cpus++; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 6c6698feade1..c9c1190b5e1f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); - -#ifdef CONFIG_SMP -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ -} -#else -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ - cpu_set(0, cpu_sharedcore_mask[0]); -} -#endif diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 55c831ed71ce..8d672ef162ce 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu) { unsigned l, h; unsigned clock_freq; - cpumask_t saved_mask; - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - return 0; - - rdmsr(MSR_IA32_PERF_STATUS, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); clock_freq = extract_clock(l, cpu, 0); if (unlikely(clock_freq == 0)) { @@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu) * P-state transition (like TM2). Get the last freq set * in PERF_CTL. */ - rdmsr(MSR_IA32_PERF_CTL, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); clock_freq = extract_clock(l, cpu, 1); } - - set_cpus_allowed_ptr(current, &saved_mask); return clock_freq; } @@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int retval = 0; unsigned int j, k, first_cpu, tmp; - cpumask_var_t saved_mask, covered_cpus; + cpumask_var_t covered_cpus; - if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) - return -ENOMEM; - if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { - free_cpumask_var(saved_mask); + if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) return -ENOMEM; - } - cpumask_copy(saved_mask, ¤t->cpus_allowed); if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { retval = -ENODEV; @@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy, first_cpu = 1; for_each_cpu(j, policy->cpus) { - const struct cpumask *mask; + int good_cpu; /* cpufreq holds the hotplug lock, so we are safe here */ if (!cpu_online(j)) @@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy, * Make sure we are running on CPU that wants to change freq */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - mask = policy->cpus; + good_cpu = cpumask_any_and(policy->cpus, + cpu_online_mask); else - mask = cpumask_of(j); + good_cpu = j; - set_cpus_allowed_ptr(current, mask); - preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { + if (good_cpu >= nr_cpu_ids) { dprintk("couldn't limit to CPUs in this domain\n"); retval = -EAGAIN; if (first_cpu) { /* We haven't started the transition yet. */ - goto migrate_end; + goto out; } - preempt_enable(); break; } msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; if (first_cpu) { - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); if (msr == (oldmsr & 0xffff)) { dprintk("no change needed - msr was and needs " "to be %x\n", oldmsr); retval = 0; - goto migrate_end; + goto out; } freqs.old = extract_clock(oldmsr, cpu, 0); @@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy, oldmsr |= msr; } - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - preempt_enable(); + wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) break; - } - cpu_set(j, *covered_cpus); - preempt_enable(); + cpumask_set_cpu(j, covered_cpus); } for_each_cpu(k, policy->cpus) { @@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - for_each_cpu_mask_nr(j, *covered_cpus) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } + for_each_cpu(j, covered_cpus) + wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); tmp = freqs.new; freqs.new = freqs.old; @@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed_ptr(current, saved_mask); retval = 0; - goto out; -migrate_end: - preempt_enable(); - set_cpus_allowed_ptr(current, saved_mask); out: - free_cpumask_var(saved_mask); free_cpumask_var(covered_cpus); return retval; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 016c1a4fa3fc..6911e91fb4f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -89,7 +89,8 @@ static int speedstep_find_register(void) * speedstep_set_state - set the SpeedStep state * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * - * Tries to change the SpeedStep state. + * Tries to change the SpeedStep state. Can be called from + * smp_call_function_single. */ static void speedstep_set_state(unsigned int state) { @@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state) return; } +/* Wrapper for smp_call_function_single. */ +static void _speedstep_set_state(void *_state) +{ + speedstep_set_state(*(unsigned int *)_state); +} /** * speedstep_activate - activate SpeedStep control in the chipset @@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -static unsigned int _speedstep_get(const struct cpumask *cpus) -{ +struct get_freq_data { unsigned int speed; - cpumask_t cpus_allowed; - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpus); - speed = speedstep_get_frequency(speedstep_processor); - set_cpus_allowed_ptr(current, &cpus_allowed); - dprintk("detected %u kHz as current frequency\n", speed); - return speed; + unsigned int processor; +}; + +static void get_freq_data(void *_data) +{ + struct get_freq_data *data = _data; + + data->speed = speedstep_get_frequency(data->processor); } static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(cpumask_of(cpu)); + struct get_freq_data data = { .processor = cpu }; + + /* You're supposed to ensure CPU is online. */ + if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + BUG(); + + dprintk("detected %u kHz as current frequency\n", data.speed); + return data.speed; } /** @@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0, policy_cpu; struct cpufreq_freqs freqs; - cpumask_t cpus_allowed; int i; if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); + freqs.old = speedstep_get(policy_cpu); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - cpus_allowed = current->cpus_allowed; - for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - /* switch to physical CPU where state is to be changed */ - set_cpus_allowed_ptr(current, policy->cpus); - - speedstep_set_state(newstate); - - /* allow to be run on all CPUs */ - set_cpus_allowed_ptr(current, &cpus_allowed); + smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, + true); for_each_cpu(i, policy->cpus) { freqs.cpu = i; @@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } +struct get_freqs { + struct cpufreq_policy *policy; + int ret; +}; + +static void get_freqs_on_cpu(void *_get_freqs) +{ + struct get_freqs *get_freqs = _get_freqs; + + get_freqs->ret = + speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &get_freqs->policy->cpuinfo.transition_latency, + &speedstep_set_state); +} static int speedstep_cpu_init(struct cpufreq_policy *policy) { - int result = 0; - unsigned int speed; - cpumask_t cpus_allowed; + int result; + unsigned int policy_cpu, speed; + struct get_freqs gf; /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); #endif - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); /* detect low and high frequency and transition latency */ - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &policy->cpuinfo.transition_latency, - &speedstep_set_state); - set_cpus_allowed_ptr(current, &cpus_allowed); - if (result) - return result; + gf.policy = policy; + smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); + if (gf.ret) + return gf.ret; /* get current speed setting */ - speed = _speedstep_get(policy->cpus); + speed = speedstep_get(policy_cpu); if (!speed) return -EIO; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 2e3c6862657b..f4c290b8482f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void) } +/* Warning: may get called from smp_call_function_single. */ unsigned int speedstep_get_frequency(unsigned int processor) { switch (processor) { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ae3180c506a6..b0597ad02c93 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -632,17 +632,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; + unsigned long *lpj; if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + lpj = &boot_cpu_data.loops_per_jiffy; #ifdef CONFIG_SMP + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#else - lpj = &boot_cpu_data.loops_per_jiffy; #endif if (!ref_freq) { diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7a74d175287b..7fc58af748b4 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -42,27 +42,12 @@ * this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) -/* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) -/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon - * Define the minimal settable sampling rate to the greater of: - * - "HW transition latency" * 100 (same as default sampling / 10) - * - MIN_STAT_SAMPLING_RATE - * To avoid that userspace shoots itself. -*/ -static unsigned int minimum_sampling_rate(void) -{ - return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); -} -/* This will also vanish soon with removing sampling_rate_max */ -#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +static unsigned int min_sampling_rate; + #define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) @@ -182,27 +167,14 @@ static struct notifier_block dbs_cpufreq_notifier_block = { /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } - return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); + printk_once(KERN_INFO "CPUFREQ: conservative sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); + return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", current->comm); - print_once = 1; - } - return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); + return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ @@ -254,7 +226,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return -EINVAL; mutex_lock(&dbs_mutex); - dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); return count; @@ -601,11 +573,18 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; - def_sampling_rate = - max(latency * LATENCY_MULTIPLIER, - MIN_STAT_SAMPLING_RATE); - - dbs_tuners_ins.sampling_rate = def_sampling_rate; + /* + * conservative does not implement micro like ondemand + * governor, thus we are bound to jiffes/HZ + */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); cpufreq_register_notifier( &dbs_cpufreq_notifier_block, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e741c339df76..1911d1729353 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -32,6 +32,7 @@ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) #define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -45,27 +46,12 @@ * this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) -/* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) -/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon - * Define the minimal settable sampling rate to the greater of: - * - "HW transition latency" * 100 (same as default sampling / 10) - * - MIN_STAT_SAMPLING_RATE - * To avoid that userspace shoots itself. -*/ -static unsigned int minimum_sampling_rate(void) -{ - return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); -} -/* This will also vanish soon with removing sampling_rate_max */ -#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +static unsigned int min_sampling_rate; + #define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); @@ -219,28 +205,14 @@ static void ondemand_powersave_bias_init(void) /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } - return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); + printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); + return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_min " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } - return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); + return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ @@ -274,7 +246,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, mutex_unlock(&dbs_mutex); return -EINVAL; } - dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); return count; @@ -619,12 +591,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) latency = 1; - - def_sampling_rate = - max(latency * LATENCY_MULTIPLIER, - MIN_STAT_SAMPLING_RATE); - - dbs_tuners_ins.sampling_rate = def_sampling_rate; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); } dbs_timer_init(this_dbs_info); @@ -678,6 +650,16 @@ static int __init cpufreq_gov_dbs_init(void) dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; dbs_tuners_ins.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } kondemand_wq = create_workqueue("kondemand"); |