From af5ca3f4ec5cc4432a42a73b050dd8898ce8fd00 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 20 Dec 2007 02:09:39 +0100 Subject: Driver core: change sysdev classes to use dynamic kobject names All kobjects require a dynamically allocated name now. We no longer need to keep track if the name is statically assigned, we can just unconditionally free() all kobject names on cleanup. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/time/clocksource.c | 2 +- kernel/time/timekeeping.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c8a9d13874df..8d6125ad2cf0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -441,7 +441,7 @@ static SYSDEV_ATTR(available_clocksource, 0600, sysfs_show_available_clocksources, NULL); static struct sysdev_class clocksource_sysclass = { - set_kset_name("clocksource"), + .name = "clocksource", }; static struct sys_device device_clocksource = { diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e5e466b27598..ab46ae8c062b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -335,9 +335,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) /* sysfs resume/suspend bits for timekeeping */ static struct sysdev_class timekeeping_sysclass = { + .name = "timekeeping", .resume = timekeeping_resume, .suspend = timekeeping_suspend, - set_kset_name("timekeeping"), }; static struct sys_device device_timer = { -- cgit v1.2.3 From 48d5e258216f1c7713633439beb98a38c7290649 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:31 +0100 Subject: sched: rt throttling vs no_hz We need to teach no_hz about the rt throttling because its tick driven. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cb89fa8db110..5f9fb645b725 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void) void tick_nohz_stop_sched_tick(void) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; + unsigned long rt_jiffies; struct tick_sched *ts; ktime_t last_update, expires, now, delta; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; @@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void) next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + rt_jiffies = rt_needs_cpu(cpu); + if (rt_jiffies && rt_jiffies < delta_jiffies) + delta_jiffies = rt_jiffies; + if (rcu_needs_cpu(cpu)) delta_jiffies = 1; /* -- cgit v1.2.3 From 2d44ae4d7135b9aee26439b3523b43473381bc5f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:31 +0100 Subject: hrtimer: clean up cpu->base locking tricks In order to more easily allow for the scheduler to use timers, clean up the locking a bit. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5f9fb645b725..1a21b6fdb674 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -514,7 +514,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); - struct hrtimer_cpu_base *base = timer->base->cpu_base; struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); int cpu = smp_processor_id(); @@ -552,15 +551,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) touch_softlockup_watchdog(); ts->idle_jiffies++; } - /* - * update_process_times() might take tasklist_lock, hence - * drop the base lock. sched-tick hrtimers are per-CPU and - * never accessible by userspace APIs, so this is safe to do. - */ - spin_unlock(&base->lock); update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); - spin_lock(&base->lock); } /* Do not restart, when we are in the idle loop */ -- cgit v1.2.3 From 4c9dc6412247abf4972080c51cd16a58c4009c19 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 30 Jan 2008 13:30:00 +0100 Subject: time: timer cleanups Small cleanups to tick-related code. Wrong preempt count is followed by BUG(), so it is hardly KERN_WARNING. Signed-off-by: Pavel Machek Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1a21b6fdb674..d36ee2fd1a3b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(void) /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; - } else if(!tick_program_event(expires, 0)) + } else if (!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a @@ -507,7 +507,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } */ #ifdef CONFIG_HIGH_RES_TIMERS /* - * We rearm the timer until we get disabled by the idle code + * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled and timer->base->cpu_base->lock held. */ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) -- cgit v1.2.3 From b10db7f0d2b589a7f88dc3026e150756cb437a28 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 30 Jan 2008 13:30:00 +0100 Subject: time: more timer related cleanups I was confused by FSEC = 10^15 NSEC statement, plus small whitespace fixes. When there's copyright, there should be GPL. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 2 +- kernel/time/timer_stats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d36ee2fd1a3b..49e12f6a4bab 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -9,7 +9,7 @@ * * Started by: Thomas Gleixner and Ingo Molnar * - * For licencing details see kernel-base/COPYING + * Distribute under GPLv2. */ #include #include diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index c36bb7ed0301..417da8c5bc72 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -26,7 +26,7 @@ * the pid and cmdline from the owner process if applicable. * * Start/stop data collection: - * # echo 1[0] >/proc/timer_stats + * # echo [1|0] >/proc/timer_stats * * Display the information collected so far: * # cat /proc/timer_stats -- cgit v1.2.3 From 186e3cb8a465bac010ee3b020768d2fa2b505aef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:01 +0100 Subject: timer: clean up tick-broadcast.c clean up tick-broadcast.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast.c | 7 ++----- kernel/time/tick-internal.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 5b86698faa0b..e1bd50cbbf5d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) /* * Broadcast the event to the cpus, which are set in the mask */ -int tick_do_broadcast(cpumask_t mask) +static void tick_do_broadcast(cpumask_t mask) { - int ret = 0, cpu = smp_processor_id(); + int cpu = smp_processor_id(); struct tick_device *td; /* @@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask) cpu_clear(cpu, mask); td = &per_cpu(tick_cpu_device, cpu); td->evtdev->event_handler(td->evtdev); - ret = 1; } if (!cpus_empty(mask)) { @@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask) cpu = first_cpu(mask); td = &per_cpu(tick_cpu_device, cpu); td->evtdev->broadcast(mask); - ret = 1; } - return ret; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index bb13f2724905..f13f2b7f4fd4 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) * Broadcasting support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern int tick_do_broadcast(cpumask_t mask); - extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern int tick_check_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -- cgit v1.2.3 From efd9ac8630e89b9ee7ce64008bd7783952374f37 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Jan 2008 13:30:01 +0100 Subject: time: fold __get_realtime_clock_ts() into getnstimeofday() - getnstimeofday() was just a wrapper around __get_realtime_clock_ts() - Replace calls to __get_realtime_clock_ts() by calls to getnstimeofday() - Fix bogus reference to get_realtime_clock_ts(), which never existed Signed-off-by: Geert Uytterhoeven Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ab46ae8c062b..77680195cf84 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void) } /** - * __get_realtime_clock_ts - Returns the time of day in a timespec + * getnstimeofday - Returns the time of day in a timespec * @ts: pointer to the timespec to be set * - * Returns the time of day in a timespec. Used by - * do_gettimeofday() and get_realtime_clock_ts(). + * Returns the time of day in a timespec. */ -static inline void __get_realtime_clock_ts(struct timespec *ts) +void getnstimeofday(struct timespec *ts) { unsigned long seq; s64 nsecs; @@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts) timespec_add_ns(ts, nsecs); } -/** - * getnstimeofday - Returns the time of day in a timespec - * @ts: pointer to the timespec to be set - * - * Returns the time of day in a timespec. - */ -void getnstimeofday(struct timespec *ts) -{ - __get_realtime_clock_ts(ts); -} - EXPORT_SYMBOL(getnstimeofday); /** * do_gettimeofday - Returns the time of day in a timeval * @tv: pointer to the timeval to be set * - * NOTE: Users should be converted to using get_realtime_clock_ts() + * NOTE: Users should be converted to using getnstimeofday() */ void do_gettimeofday(struct timeval *tv) { struct timespec now; - __get_realtime_clock_ts(&now); + getnstimeofday(&now); tv->tv_sec = now.tv_sec; tv->tv_usec = now.tv_nsec/1000; } -- cgit v1.2.3 From 1077f5a917b7c630231037826b344b2f7f5b903f Mon Sep 17 00:00:00 2001 From: Parag Warudkar Date: Wed, 30 Jan 2008 13:30:01 +0100 Subject: clocksource.c: use init_timer_deferrable for clocksource_watchdog clocksource_watchdog can use a deferrable timer - reduces wakeups from idle per second. Signed-off-by: Parag Warudkar Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 8d6125ad2cf0..cabfa193efb3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -175,7 +175,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) if (watchdog) del_timer(&watchdog_timer); watchdog = cs; - init_timer(&watchdog_timer); + init_timer_deferrable(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; /* Reset watchdog cycles */ -- cgit v1.2.3 From 1ada5cba6a0318f90e45b38557e7b5206a9cba38 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: clocksource: make clocksource watchdog cycle through online CPUs This way it checks if the clocks are synchronized between CPUs too. This might be able to detect slowly drifting TSCs which only go wrong over longer time. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cabfa193efb3..edd5ef8e1765 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data) } if (!list_empty(&watchdog_list)) { - __mod_timer(&watchdog_timer, - watchdog_timer.expires + WATCHDOG_INTERVAL); + /* Cycle through CPUs to check if the CPUs stay synchronized to + * each other. */ + int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); + if (next_cpu >= NR_CPUS) + next_cpu = first_cpu(cpu_online_map); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); } spin_unlock(&watchdog_lock); } @@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) if (!started && watchdog) { watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, first_cpu(cpu_online_map)); } } else { if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) @@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, + first_cpu(cpu_online_map)); } } } -- cgit v1.2.3 From 4713e22ce81eb8b3353e16435362eb3d0ec95640 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: clocksource: add unregister function to disable unusable clocksources On x86 the PIT might become an unusable clocksource. Add an unregister function to provide a possibilty to remove the PIT from the list of available clock sources. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index edd5ef8e1765..6e9259a5d501 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -337,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating) spin_unlock_irqrestore(&clocksource_lock, flags); } +/** + * clocksource_unregister - remove a registered clocksource + */ +void clocksource_unregister(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&clocksource_lock, flags); + list_del(&cs->list); + if (clocksource_override == cs) + clocksource_override = NULL; + next_clocksource = select_clocksource(); + spin_unlock_irqrestore(&clocksource_lock, flags); +} + #ifdef CONFIG_SYSFS /** * sysfs_show_current_clocksources - sysfs interface for current clocksource -- cgit v1.2.3 From 45fe4fe19120a22f7339f5bb110447170c25fca9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: make clockevents more robust detect zero event-device multiplicators - they then cause division-by-zero crashes if a clockevent has been initialized incorrectly. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 5fb139fef9fa..3e59fce6dd43 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch, { u64 clc = ((u64) latch << evt->shift); + if (unlikely(!evt->mult)) { + evt->mult = 1; + WARN_ON(1); + } + do_div(clc, evt->mult); if (clc < 1000) clc = 1000; @@ -151,6 +156,14 @@ static void clockevents_notify_released(void) void clockevents_register_device(struct clock_event_device *dev) { BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + /* + * A nsec2cyc multiplicator of 0 is invalid and we'd crash + * on it, so fix it up and emit a warning: + */ + if (unlikely(!dev->mult)) { + dev->mult = 1; + WARN_ON(1); + } spin_lock(&clockevents_lock); -- cgit v1.2.3 From bbe4d18ac2e058c56adb0cd71f49d9ed3216a405 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: NTP: correct inconsistent ntp interval/tick_length usage I recently noticed on one of my boxes that when synched with an NTP server, the drift value reported for the system was ~283ppm. While in some cases, clock hardware can be that bad, it struck me as unusual as the system was using the acpi_pm clocksource, which is one of the more trustworthy and accurate clocksources on x86 hardware. I brought up another system and let it sync to the same NTP server, and I noticed a similar 280some ppm drift. In looking at the code, I found that the acpi_pm's constant frequency was being computed correctly at boot-up, however once the system was up, even without the ntp daemon running, the clocksource's frequency was being modified by the clocksource_adjust() function. Digging deeper, I realized that in the code that keeps track of how much the clocksource is skewing from the ntp desired time, we were using different lengths to establish how long an time interval was. The clocksource was being setup with the following interval: NTP_INTERVAL_LENGTH = NSEC_PER_SEC/NTP_INTERVAL_FREQ While the ntp code was using the tick_length_base value: tick_length_base ~= (tick_usec * NSEC_PER_USEC * USER_HZ) /NTP_INTERVAL_FREQ The subtle difference is: (tick_usec * NSEC_PER_USEC * USER_HZ) != NSEC_PER_SEC This difference in calculation was causing the clocksource correction code to apply a correction factor to the clocksource so the two intervals were the same, however this results in the actual frequency of the clocksource to be made incorrect. I believe this difference would affect all clocksources, although to differing degrees depending on the clocksource resolution. The issue was introduced when my HZ free ntp patch landed in 2.6.21-rc1, so my apologies for the mistake, and for not noticing it until now. The following patch, corrects the clocksource's initialization code so it uses the same interval length as the code in ntp.c. After applying this patch, the drift value for the same system went from ~283ppm to only 2.635ppm. I believe this patch to be good, however it does affect all arches and I've only tested on x86, so some caution is advised. I do think it would be a likely candidate for a stable 2.6.24.x release. Any thoughts or feedback would be appreciated. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 77680195cf84..092a2366b5a9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -186,7 +186,8 @@ static void change_clocksource(void) clock->error = 0; clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); tick_clock_notify(); @@ -243,7 +244,8 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); clock->cycle_last = clocksource_read(clock); xtime.tv_sec = sec; -- cgit v1.2.3 From 6378ddb592158db4b42197f1bc8666228800e379 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Wed, 30 Jan 2008 13:30:04 +0100 Subject: time: track accurate idle time with tick_sched.idle_sleeptime Current idle time in kstat is based on jiffies and is coarse grained. tick_sched.idle_sleeptime is making some attempt to keep track of idle time in a fine grained manner. But, it is not handling the time spent in interrupts fully. Make tick_sched.idle_sleeptime accurate with respect to time spent on handling interrupts and also add tick_sched.idle_lastupdate, which keeps track of last time when idle_sleeptime was updated. This statistics will be crucial for cpufreq-ondemand governor, which can shed some conservative gaurd band that is uses today while setting the frequency. The ondemand changes that uses the exact idle time is coming soon. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 70 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 22 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 49e12f6a4bab..63f24b550695 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void) local_irq_restore(flags); } +void tick_nohz_stop_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + if (ts->idle_active) { + ktime_t now, delta; + now = ktime_get(); + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_active = 0; + } +} + +static ktime_t tick_nohz_start_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, delta; + + now = ktime_get(); + if (ts->idle_active) { + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + } + ts->idle_entrytime = now; + ts->idle_active = 1; + return now; +} + +u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + *last_update_time = ktime_to_us(ts->idle_lastupdate); + return ktime_to_us(ts->idle_sleeptime); +} + /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * @@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void) unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; unsigned long rt_jiffies; struct tick_sched *ts; - ktime_t last_update, expires, now, delta; + ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; int cpu; local_irq_save(flags); cpu = smp_processor_id(); + now = tick_nohz_start_idle(cpu); ts = &per_cpu(tick_cpu_sched, cpu); /* @@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void) } } - now = ktime_get(); - /* - * When called from irq_exit we need to account the idle sleep time - * correctly. - */ - if (ts->tick_stopped) { - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - } - - ts->idle_entrytime = now; ts->idle_calls++; - /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void) int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long ticks; - ktime_t now, delta; + ktime_t now; - if (!ts->tick_stopped) + local_irq_disable(); + tick_nohz_stop_idle(cpu); + + if (!ts->tick_stopped) { + local_irq_enable(); return; + } /* Update jiffies first */ - now = ktime_get(); - - local_irq_disable(); select_nohz_load_balancer(0); + now = ktime_get(); tick_do_update_jiffies64(now); cpu_clear(cpu, nohz_cpu_mask); - /* Account the idle time */ - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick -- cgit v1.2.3