From 8aae8284fece2b8fc404ccd40b7a30aa96f317b5 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Mon, 21 Nov 2005 07:23:08 -0800 Subject: [CPUFREQ] Improve Error reporting in powernow-k8 This patch cleans up some error messages in the powernow-k8 driver and makes them more understandable. Signed-off-by: Jacob Shin Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 68a1fc87f4ca..349782736808 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -45,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.4" +#define VERSION "version 1.50.5" #include "powernow-k8.h" /* serialize freq changes */ @@ -464,7 +464,7 @@ static int check_supported_cpu(unsigned int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); goto out; } @@ -517,22 +517,28 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); return -ENODEV; } - if ((pst[j].fid > MAX_FID) - || (pst[j].fid & 1) - || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) { + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].fid & 1) { + printk(KERN_ERR BFX "fid invalid - %d : 0x%x\n", j, pst[j].fid); + return -EINVAL; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); return -EINVAL; } if (pst[j].fid < lastfid) lastfid = pst[j].fid; } if (lastfid & 1) { - printk(KERN_ERR PFX "lastfid invalid\n"); + printk(KERN_ERR BFX "lastfid invalid\n"); return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO PFX "first fid not from lo freq table\n"); + printk(KERN_INFO BFX "first fid not from lo freq table\n"); return 0; } @@ -912,7 +918,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -982,6 +988,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_t oldmask = CPU_MASK_ALL; int rc, i; + if (!cpu_online(pol->cpu)) + return -ENODEV; + if (!check_supported_cpu(pol->cpu)) return -ENODEV; @@ -1021,7 +1030,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -1162,7 +1171,7 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf and Mark Langsdorf "); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 019a61b99338d0ac05de25317b85da88e7ec4b35 Mon Sep 17 00:00:00 2001 From: "Langsdorf, Mark" Date: Tue, 29 Nov 2005 14:18:03 -0600 Subject: [PATCH] Support 100 MHz frequency transitions Future versions of the Opteron processor may support frequency transitions of 100 MHz, instead of the=20 current 200 MHz. This patch enables the powernow-k8 driver to transition to an odd FID code, indicating a multiple of 100 MHz frequency. Signed-off-by: Jacob Shin Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 17 +++++++---------- arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 9 +++++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 349782736808..a342b32c0843 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -45,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.5" +#define VERSION "version 1.60.0" #include "powernow-k8.h" /* serialize freq changes */ @@ -336,7 +336,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid /* Phase 2 - core frequency transition */ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) { - u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid; + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", @@ -359,9 +359,11 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) : vcoreqfid - vcocurrfid; while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + if (reqfid > data->currfid) { if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + 2)) { + if (write_new_fid(data, data->currfid + fid_interval)) { return 1; } } else { @@ -371,7 +373,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) } } } else { - if (write_new_fid(data, data->currfid - 2)) + if (write_new_fid(data, data->currfid - fid_interval)) return 1; } @@ -474,7 +476,7 @@ static int check_supported_cpu(unsigned int cpu) eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) { + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; } @@ -521,10 +523,6 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); return -ENODEV; } - if (pst[j].fid & 1) { - printk(KERN_ERR BFX "fid invalid - %d : 0x%x\n", j, pst[j].fid); - return -EINVAL; - } if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { /* Only first fid is allowed to be in "low" range */ printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); @@ -1177,4 +1175,3 @@ MODULE_LICENSE("GPL"); late_initcall(powernowk8_init); module_exit(powernowk8_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index b1e85bb36396..d0de37d58e9a 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -42,7 +42,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_F 0x00040000 +#define CPUID_XMOD_REV_G 0x00060000 #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -86,13 +86,14 @@ struct powernow_k8_data { * low fid table * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry * in the low fid table - * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. * - lowest frequency must be >= interprocessor hypertransport link speed * (only applies to MP systems obviously) */ /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ @@ -106,7 +107,7 @@ struct powernow_k8_data { #define MIN_FREQ 800 /* Min and max freqs, per spec */ #define MAX_FREQ 5000 -#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ #define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ #define VID_OFF 0x3f -- cgit v1.2.3 From 6df8900676c3f5c133328332fb8ad889fd0cc9e3 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 30 Nov 2005 13:33:30 -0800 Subject: [CPUFREQ] Fix indentation in powernow-k8 Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index a342b32c0843..f44c1b1b04e9 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -216,10 +216,10 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) do { wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { -- cgit v1.2.3 From 001893cda2f280ab882164737a0b608208524809 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Thu, 1 Dec 2005 01:09:25 -0800 Subject: [PATCH] cpufreq_conservative/ondemand: invert meaning of 'ignore nice' The use of the 'ignore_nice' sysfs file is confusing to anyone using it. This removes the sysfs file 'ignore_nice' and in its place creates a 'ignore_nice_load' entry that defaults to '0'; meaning nice'd processes _are_ counted towards the 'business' calculation. WARNING: this obvious breaks any userland tools that expected ignore_nice' to exist, to draw attention to this fact it was concluded on the mailing list that the entry should be removed altogether so the userland app breaks and so the author can build simple to detect workaround. Having said that it seems currently very few tools even make use of this functionality; all I could find was a Gentoo Wiki entry. Signed-off-by: Alexander Clouter Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 10 +++++----- drivers/cpufreq/cpufreq_ondemand.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2ed5c4363b53..39543a2bed0f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -93,7 +93,7 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu) { return kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait + - ( !dbs_tuners_ins.ignore_nice ? + ( dbs_tuners_ins.ignore_nice ? kstat_cpu(cpu).cpustat.nice : 0); } @@ -127,7 +127,7 @@ show_one(sampling_rate, sampling_rate); show_one(sampling_down_factor, sampling_down_factor); show_one(up_threshold, up_threshold); show_one(down_threshold, down_threshold); -show_one(ignore_nice, ignore_nice); +show_one(ignore_nice_load, ignore_nice); show_one(freq_step, freq_step); static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, @@ -207,7 +207,7 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_ignore_nice(struct cpufreq_policy *policy, +static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, const char *buf, size_t count) { unsigned int input; @@ -272,7 +272,7 @@ define_one_rw(sampling_rate); define_one_rw(sampling_down_factor); define_one_rw(up_threshold); define_one_rw(down_threshold); -define_one_rw(ignore_nice); +define_one_rw(ignore_nice_load); define_one_rw(freq_step); static struct attribute * dbs_attributes[] = { @@ -282,7 +282,7 @@ static struct attribute * dbs_attributes[] = { &sampling_down_factor.attr, &up_threshold.attr, &down_threshold.attr, - &ignore_nice.attr, + &ignore_nice_load.attr, &freq_step.attr, NULL }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 17741111246b..e69fd8dd1f1c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -89,7 +89,7 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu) { return kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait + - ( !dbs_tuners_ins.ignore_nice ? + ( dbs_tuners_ins.ignore_nice ? kstat_cpu(cpu).cpustat.nice : 0); } @@ -122,7 +122,7 @@ static ssize_t show_##file_name \ show_one(sampling_rate, sampling_rate); show_one(sampling_down_factor, sampling_down_factor); show_one(up_threshold, up_threshold); -show_one(ignore_nice, ignore_nice); +show_one(ignore_nice_load, ignore_nice); static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, const char *buf, size_t count) @@ -182,7 +182,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_ignore_nice(struct cpufreq_policy *policy, +static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, const char *buf, size_t count) { unsigned int input; @@ -223,7 +223,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); define_one_rw(sampling_down_factor); define_one_rw(up_threshold); -define_one_rw(ignore_nice); +define_one_rw(ignore_nice_load); static struct attribute * dbs_attributes[] = { &sampling_rate_max.attr, @@ -231,7 +231,7 @@ static struct attribute * dbs_attributes[] = { &sampling_rate.attr, &sampling_down_factor.attr, &up_threshold.attr, - &ignore_nice.attr, + &ignore_nice_load.attr, NULL }; -- cgit v1.2.3 From 537208c8072280ab87916710d5a3f7ef11ab94ff Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Thu, 1 Dec 2005 01:09:23 -0800 Subject: [PATCH] cpufreq: documentation for 'ondemand' and 'conservative' Added a more verbose entry for the 'ondemend' governor and an entry for the 'conservative' governor to the documentation. Signed-off-by: Alexander Clouter Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- Documentation/cpu-freq/governors.txt | 62 ++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index 933fae74c337..f4b8dc4237e6 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -27,6 +27,7 @@ Contents: 2.2 Powersave 2.3 Userspace 2.4 Ondemand +2.5 Conservative 3. The Governor Interface in the CPUfreq Core @@ -110,9 +111,64 @@ directory. The CPUfreq govenor "ondemand" sets the CPU depending on the current usage. To do this the CPU must have the capability to -switch the frequency very fast. - - +switch the frequency very quickly. There are a number of sysfs file +accessible parameters: + +sampling_rate: measured in uS (10^-6 seconds), this is how often you +want the kernel to look at the CPU usage and to make decisions on +what to do about the frequency. Typically this is set to values of +around '10000' or more. + +show_sampling_rate_(min|max): the minimum and maximum sampling rates +available that you may set 'sampling_rate' to. + +up_threshold: defines what the average CPU usaged between the samplings +of 'sampling_rate' needs to be for the kernel to make a decision on +whether it should increase the frequency. For example when it is set +to its default value of '80' it means that between the checking +intervals the CPU needs to be on average more than 80% in use to then +decide that the CPU frequency needs to be increased. + +sampling_down_factor: this parameter controls the rate that the CPU +makes a decision on when to decrease the frequency. When set to its +default value of '5' it means that at 1/5 the sampling_rate the kernel +makes a decision to lower the frequency. Five "lower rate" decisions +have to be made in a row before the CPU frequency is actually lower. +If set to '1' then the frequency decreases as quickly as it increases, +if set to '2' it decreases at half the rate of the increase. + +ignore_nice_load: this parameter takes a value of '0' or '1', when set +to '0' (its default) then all processes are counted towards towards the +'cpu utilisation' value. When set to '1' then processes that are +run with a 'nice' value will not count (and thus be ignored) in the +overal usage calculation. This is useful if you are running a CPU +intensive calculation on your laptop that you do not care how long it +takes to complete as you can 'nice' it and prevent it from taking part +in the deciding process of whether to increase your CPU frequency. + + +2.5 Conservative +---------------- + +The CPUfreq governor "conservative", much like the "ondemand" +governor, sets the CPU depending on the current usage. It differs in +behaviour in that it gracefully increases and decreases the CPU speed +rather than jumping to max speed the moment there is any load on the +CPU. This behaviour more suitable in a battery powered environment. +The governor is tweaked in the same manner as the "ondemand" governor +through sysfs with the addition of: + +freq_step: this describes what percentage steps the cpu freq should be +increased and decreased smoothly by. By default the cpu frequency will +increase in 5% chunks of your maximum cpu frequency. You can change this +value to anywhere between 0 and 100 where '0' will effectively lock your +CPU at a speed regardless of its load whilst '100' will, in theory, make +it behave identically to the "ondemand" governor. + +down_threshold: same as the 'up_threshold' found for the "ondemand" +governor but for the opposite direction. For example when set to its +default value of '20' it means that if the CPU usage needs to be below +20% between samples to have the frequency decreased. 3. The Governor Interface in the CPUfreq Core ============================================= -- cgit v1.2.3 From d4921914de19339d7cd987d2ed6d48754821f41a Mon Sep 17 00:00:00 2001 From: "Gabriel A. Devenyi" Date: Thu, 1 Dec 2005 01:09:22 -0800 Subject: [PATCH] cpufreq-nforce2.c fix u32<0 test Thanks to LinuxICC (http://linuxicc.sf.net), a comparison of a u32 less than 0 was found, this patch changes the variable to a signed int so that comparison is meaningful. Signed-off-by: Gabriel A. Devenyi Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 04a405345203..2b62dee35c6c 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -177,9 +177,10 @@ static unsigned int nforce2_fsb_read(int bootfsb) */ static int nforce2_set_fsb(unsigned int fsb) { - u32 pll, temp = 0; + u32 temp = 0; unsigned int tfsb; int diff; + int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); -- cgit v1.2.3 From cc6e8de8f0fab61760bb7091fb19eef1406e17be Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 6 Dec 2005 15:03:55 -0800 Subject: [CPUFREQ] Change loglevels on powernow-k8 bios error printk's. If a user has booted with 'quiet', some important messages don't get displayed which really should. We've seen at least one case where powernow-k8 stopped working, and the user needed a BIOS update that they didn't know about. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index f44c1b1b04e9..0fbbd4c1072e 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -635,7 +635,7 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("table vers: 0x%x\n", psb->tableversion); if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_INFO BFX "PSB table is not v1.4\n"); + printk(KERN_ERR BFX "PSB table is not v1.4\n"); return -ENODEV; } @@ -693,7 +693,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } -- cgit v1.2.3 From 1a10760c91c394dfe4adfefeeaf85cd8098c4894 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Fri, 2 Dec 2005 21:59:41 +0100 Subject: [CPUFREQ] Measure transition latency at driver initialization The attached patch introduces runtime latency measurement for ICH[234] based chipsets instead of using CPUFREQ_ETERNAL. It includes some sanity checks in case the measured value is out of range and assigns a safe value of 500uSec that should still be enough on problematics chipsets (current testing report values ~200uSec). The measurement is currently done in speedstep_get_freqs in order to avoid further unnecessary transitions and in the hope it'll come handy for SMI also. Signed-off-by: Mattia Dongili Acked-by: Dominik Brodowski Signed-off-by: Dave Jones speedstep-ich.c | 4 ++-- speedstep-lib.c | 32 +++++++++++++++++++++++++++++++- speedstep-lib.h | 1 + speedstep-smi.c | 1 + 4 files changed, 35 insertions(+), 3 deletions(-) --- arch/i386/kernel/cpu/cpufreq/speedstep-ich.c | 4 ++-- arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | 32 +++++++++++++++++++++++++++- arch/i386/kernel/cpu/cpufreq/speedstep-lib.h | 1 + arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 1 + 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 5b7d18a06afa..862e0b5656b9 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -315,10 +315,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) cpus_allowed = current->cpus_allowed; set_cpus_allowed(current, policy->cpus); - /* detect low and high frequency */ + /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, &speedstep_set_state); set_cpus_allowed(current, cpus_allowed); if (result) @@ -335,7 +336,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = speed; result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index d368b3f5fce8..7c47005a1805 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -320,11 +320,13 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)) { unsigned int prev_speed; unsigned int ret = 0; unsigned long flags; + struct timeval tv1, tv2; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; @@ -337,7 +339,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, return -EIO; dprintk("previous speed is %u\n", prev_speed); - + local_irq_save(flags); /* switch to low state */ @@ -350,8 +352,17 @@ unsigned int speedstep_get_freqs(unsigned int processor, dprintk("low speed is %u\n", *low_speed); + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + /* switch to high state */ set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + *high_speed = speedstep_get_processor_frequency(processor); if (!*high_speed) { ret = -EIO; @@ -369,6 +380,25 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (*high_speed != prev_speed) set_state(SPEEDSTEP_LOW); + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + out: local_irq_restore(flags); return (ret); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index 261a2c9b7f6b..6a727fd3a77e 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -44,4 +44,5 @@ extern unsigned int speedstep_get_processor_frequency(unsigned int processor); extern unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 2718fb6f6aba..28cc5d524afc 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -269,6 +269,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, &speedstep_set_state); if (result) { -- cgit v1.2.3 From 9a7d82a89a8bf55b112f2a5c3b3f405eb95a4303 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Wed, 30 Nov 2005 22:00:59 +0100 Subject: [CPUFREQ] Move PMBASE reading away and do it only once at initialization time This patch moves away PMBASE reading and only performs it at cpufreq_register_driver time by exiting with -ENODEV if unable to read the value. Signed-off-by: Mattia Dongili Acked-by: Dominik Brodowski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-ich.c | 43 +++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 862e0b5656b9..b425cd3d1838 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -40,6 +40,7 @@ static struct pci_dev *speedstep_chipset_dev; */ static unsigned int speedstep_processor = 0; +static u32 pmbase; /* * There are only two frequency states for each processor. Values @@ -56,34 +57,47 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { /** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * speedstep_find_register - read the PMBASE address * - * Tries to change the SpeedStep state. + * Returns: -ENODEV if no register could be found */ -static void speedstep_set_state (unsigned int state) +static int speedstep_find_register (void) { - u32 pmbase; - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (!speedstep_chipset_dev || (state > 0x1)) - return; + if (!speedstep_chipset_dev) + return -ENODEV; /* get PMBASE */ pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); if (!(pmbase & 0x01)) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } pmbase &= 0xFFFFFFFE; if (!pmbase) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + /* Disable IRQs */ local_irq_save(flags); @@ -400,6 +414,9 @@ static int __init speedstep_init(void) return -EINVAL; } + if (speedstep_find_register()) + return -ENODEV; + return cpufreq_register_driver(&speedstep_driver); } -- cgit v1.2.3 From 95235ca2c20ac0b31a8eb39e2d599bcc3e9c9a10 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 2 Dec 2005 10:43:20 -0800 Subject: [CPUFREQ] CPU frequency display in /proc/cpuinfo What is the value shown in "cpu MHz" of /proc/cpuinfo when CPUs are capable of changing frequency? Today the answer is: It depends. On i386: SMP kernel - It is always the boot frequency UP kernel - Scales with the frequency change and shows that was last set. On x86_64: There is one single variable cpu_khz that gets written by all the CPUs. So, the frequency set by last CPU will be seen on /proc/cpuinfo of all the CPUs in the system. What you see also depends on whether you have constant_tsc capable CPU or not. On ia64: It is always boot time frequency of a particular CPU that gets displayed. The patch below changes this to: Show the last known frequency of the particular CPU, when cpufreq is present. If cpu doesnot support changing of frequency through cpufreq, then boot frequency will be shown. The patch affects i386, x86_64 and ia64 architectures. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/proc.c | 6 +++++- arch/ia64/kernel/setup.c | 8 +++++++- arch/x86_64/kernel/setup.c | 6 +++++- drivers/cpufreq/cpufreq.c | 24 ++++++++++++++++++++++++ include/linux/cpufreq.h | 10 ++++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index e7921315ae9d..6d91b274589c 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Get CPU information for use by the procfs. @@ -86,8 +87,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 5add0bcf87a7..088e5dded8dc 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -517,6 +518,7 @@ show_cpuinfo (struct seq_file *m, void *v) char family[32], features[128], *cp, sep; struct cpuinfo_ia64 *c = v; unsigned long mask; + unsigned int proc_freq; int i; mask = c->features; @@ -549,6 +551,10 @@ show_cpuinfo (struct seq_file *m, void *v) sprintf(cp, " 0x%lx", mask); } + proc_freq = cpufreq_quick_get(cpunum); + if (!proc_freq) + proc_freq = c->proc_freq / 1000; + seq_printf(m, "processor : %d\n" "vendor : %s\n" @@ -565,7 +571,7 @@ show_cpuinfo (struct seq_file *m, void *v) "BogoMIPS : %lu.%02lu\n", cpunum, c->vendor, family, c->model, c->revision, c->archrev, features, c->ppn, c->number, - c->proc_freq / 1000000, c->proc_freq % 1000000, + proc_freq / 1000, proc_freq % 1000, c->itc_freq / 1000000, c->itc_freq % 1000000, lpj*HZ/500000, (lpj*HZ/5000) % 100); #ifdef CONFIG_SMP diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 750e01dcbdf4..64c4534b930c 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 815902c2c856..a9163d02983a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -822,6 +822,30 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne } +/** + * cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur + * @cpu: CPU number + * + * This is the last known freq, without actually getting it from the driver. + * Return value will be same as what is shown in scaling_cur_freq in sysfs. + */ +unsigned int cpufreq_quick_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + unsigned int ret = 0; + + if (policy) { + down(&policy->lock); + ret = policy->cur; + up(&policy->lock); + cpufreq_cpu_put(policy); + } + + return (ret); +} +EXPORT_SYMBOL(cpufreq_quick_get); + + /** * cpufreq_get - get the current CPU frequency (in kHz) * @cpu: CPU number diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d068176b7ad7..c31650df9241 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -256,6 +256,16 @@ int cpufreq_update_policy(unsigned int cpu); /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ unsigned int cpufreq_get(unsigned int cpu); +/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ +#ifdef CONFIG_CPU_FREQ +unsigned int cpufreq_quick_get(unsigned int cpu); +#else +static inline unsigned int cpufreq_quick_get(unsigned int cpu) +{ + return 0; +} +#endif + /********************************************************************* * CPUFREQ DEFAULT GOVERNOR * -- cgit v1.2.3