diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/clocksource.c | 84 | ||||
-rw-r--r-- | kernel/time/ntp.c | 12 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 52 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 84 | ||||
-rw-r--r-- | kernel/time/timecompare.c | 1 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 41 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 4 |
7 files changed, 221 insertions, 57 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e85c23404d34..f08e99c1d561 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void) { unsigned long flags; - spin_lock_irqsave(&watchdog_lock, flags); + /* + * We use trylock here to avoid a potential dead lock when + * kgdb calls this code after the kernel has been stopped with + * watchdog_lock held. When watchdog_lock is held we just + * return and accept, that the watchdog might trigger and mark + * the monitored clock source (usually TSC) unstable. + * + * This does not affect the other caller clocksource_resume() + * because at this point the kernel is UP, interrupts are + * disabled and nothing can hold watchdog_lock. + */ + if (!spin_trylock_irqsave(&watchdog_lock, flags)) + return; clocksource_reset_watchdog(); spin_unlock_irqrestore(&watchdog_lock, flags); } @@ -441,6 +453,18 @@ static inline int clocksource_watchdog_kthread(void *data) { return 0; } #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ /** + * clocksource_suspend - suspend the clocksource(s) + */ +void clocksource_suspend(void) +{ + struct clocksource *cs; + + list_for_each_entry_reverse(cs, &clocksource_list, list) + if (cs->suspend) + cs->suspend(cs); +} + +/** * clocksource_resume - resume the clocksource(s) */ void clocksource_resume(void) @@ -449,7 +473,7 @@ void clocksource_resume(void) list_for_each_entry(cs, &clocksource_list, list) if (cs->resume) - cs->resume(); + cs->resume(cs); clocksource_resume_watchdog(); } @@ -458,8 +482,8 @@ void clocksource_resume(void) * clocksource_touch_watchdog - Update watchdog * * Update the watchdog after exception contexts such as kgdb so as not - * to incorrectly trip the watchdog. - * + * to incorrectly trip the watchdog. This might fail when the kernel + * was stopped in code which holds watchdog_lock. */ void clocksource_touch_watchdog(void) { @@ -568,6 +592,10 @@ static inline void clocksource_select(void) { } */ static int __init clocksource_done_booting(void) { + mutex_lock(&clocksource_mutex); + curr_clocksource = clocksource_default_clock(); + mutex_unlock(&clocksource_mutex); + finished_booting = 1; /* @@ -597,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) list_add(&cs->list, entry); } + +/* + * Maximum time we expect to go between ticks. This includes idle + * tickless time. It provides the trade off between selecting a + * mult/shift pair that is very precise but can only handle a short + * period of time, vs. a mult/shift pair that can handle long periods + * of time but isn't as precise. + * + * This is a subsystem constant, and actual hardware limitations + * may override it (ie: clocksources that wrap every 3 seconds). + */ +#define MAX_UPDATE_LENGTH 5 /* Seconds */ + +/** + * __clocksource_register_scale - Used to install new clocksources + * @t: clocksource to be registered + * @scale: Scale factor multiplied against freq to get clocksource hz + * @freq: clocksource frequency (cycles per second) divided by scale + * + * Returns -EBUSY if registration fails, zero otherwise. + * + * This *SHOULD NOT* be called directly! Please use the + * clocksource_register_hz() or clocksource_register_khz helper functions. + */ +int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) +{ + + /* + * Ideally we want to use some of the limits used in + * clocksource_max_deferment, to provide a more informed + * MAX_UPDATE_LENGTH. But for now this just gets the + * register interface working properly. + */ + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC/scale, + MAX_UPDATE_LENGTH*scale); + cs->max_idle_ns = clocksource_max_deferment(cs); + + mutex_lock(&clocksource_mutex); + clocksource_enqueue(cs); + clocksource_select(); + clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(__clocksource_register_scale); + + /** * clocksource_register - Used to install new clocksources * @t: clocksource to be registered diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4800f933910e..c63116863a80 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -58,10 +58,10 @@ static s64 time_offset; static long time_constant = 2; /* maximum error (usecs): */ -long time_maxerror = NTP_PHASE_LIMIT; +static long time_maxerror = NTP_PHASE_LIMIT; /* estimated error (usecs): */ -long time_esterror = NTP_PHASE_LIMIT; +static long time_esterror = NTP_PHASE_LIMIT; /* frequency offset (scaled nsecs/secs): */ static s64 time_freq; @@ -69,7 +69,7 @@ static s64 time_freq; /* time at last adjustment (secs): */ static long time_reftime; -long time_adjust; +static long time_adjust; /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ static s64 ntp_tick_adj; @@ -142,11 +142,11 @@ static void ntp_update_offset(long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - secs = xtime.tv_sec - time_reftime; + secs = get_seconds() - time_reftime; if (unlikely(time_status & STA_FREQHOLD)) secs = 0; - time_reftime = xtime.tv_sec; + time_reftime = get_seconds(); offset64 = offset; freq_adj = (offset64 * secs) << @@ -368,7 +368,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) * reference time to current time. */ if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) - time_reftime = xtime.tv_sec; + time_reftime = get_seconds(); /* only set allowed bits */ time_status &= STA_RONLY; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 0a8a213016f0..aada0e52680a 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -22,6 +22,29 @@ #include "tick-internal.h" +/* Limit min_delta to a jiffie */ +#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) + +static int tick_increase_min_delta(struct clock_event_device *dev) +{ + /* Nothing to do if we already reached the limit */ + if (dev->min_delta_ns >= MIN_DELTA_LIMIT) + return -ETIME; + + if (dev->min_delta_ns < 5000) + dev->min_delta_ns = 5000; + else + dev->min_delta_ns += dev->min_delta_ns >> 1; + + if (dev->min_delta_ns > MIN_DELTA_LIMIT) + dev->min_delta_ns = MIN_DELTA_LIMIT; + + printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); + return 0; +} + /** * tick_program_event internal worker function */ @@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, if (!ret || !force) return ret; + dev->retries++; /* - * We tried 2 times to program the device with the given - * min_delta_ns. If that's not working then we double it + * We tried 3 times to program the device with the given + * min_delta_ns. If that's not working then we increase it * and emit a warning. */ if (++i > 2) { /* Increase the min. delta and try again */ - if (!dev->min_delta_ns) - dev->min_delta_ns = 5000; - else - dev->min_delta_ns += dev->min_delta_ns >> 1; - - printk(KERN_WARNING - "CE: %s increasing min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns << 1); - + if (tick_increase_min_delta(dev)) { + /* + * Get out of the loop if min_delta_ns + * hit the limit already. That's + * better than staying here forever. + * + * We clear next_event so we have a + * chance that the box survives. + */ + printk(KERN_WARNING + "CE: Reprogramming failure. Giving up\n"); + dev->next_event.tv64 = KTIME_MAX; + return -ETIME; + } i = 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f992762d7f51..1d7b9bc1c034 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -150,14 +150,32 @@ static void tick_nohz_update_jiffies(ktime_t now) touch_softlockup_watchdog(); } +/* + * Updates the per cpu time idle statistics counters + */ +static void +update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) +{ + ktime_t delta; + + if (ts->idle_active) { + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + if (nr_iowait_cpu() > 0) + ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + ts->idle_entrytime = now; + } + + if (last_update_time) + *last_update_time = ktime_to_us(now); + +} + static void tick_nohz_stop_idle(int cpu, ktime_t now) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - ktime_t delta; - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + update_ts_time_stats(ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); @@ -165,20 +183,32 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { - ktime_t now, delta; + ktime_t now; now = ktime_get(); - if (ts->idle_active) { - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - } + + update_ts_time_stats(ts, now, NULL); + ts->idle_entrytime = now; ts->idle_active = 1; sched_clock_idle_sleep_event(); return now; } +/** + * get_cpu_idle_time_us - get the total idle time of a cpu + * @cpu: CPU number to query + * @last_update_time: variable to store update time in + * + * Return the cummulative idle time (since boot) for a given + * CPU, in microseconds. The idle time returned includes + * the iowait time (unlike what "top" and co report). + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * This function returns -1 if NOHZ is not enabled. + */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); @@ -186,15 +216,38 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) if (!tick_nohz_enabled) return -1; - if (ts->idle_active) - *last_update_time = ktime_to_us(ts->idle_lastupdate); - else - *last_update_time = ktime_to_us(ktime_get()); + update_ts_time_stats(ts, ktime_get(), last_update_time); return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); +/* + * get_cpu_iowait_time_us - get the total iowait time of a cpu + * @cpu: CPU number to query + * @last_update_time: variable to store update time in + * + * Return the cummulative iowait time (since boot) for a given + * CPU, in microseconds. + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * This function returns -1 if NOHZ is not enabled. + */ +u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + if (!tick_nohz_enabled) + return -1; + + update_ts_time_stats(ts, ktime_get(), last_update_time); + + return ktime_to_us(ts->iowait_sleeptime); +} +EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); + /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * @@ -262,6 +315,9 @@ void tick_nohz_stop_sched_tick(int inidle) goto end; } + if (nohz_ratelimit(cpu)) + goto end; + ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index 12f5c55090be..ac38fbb176cc 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -19,6 +19,7 @@ #include <linux/timecompare.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/math64.h> /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7faaa32fbf4f..caf8d4d4f5c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -165,13 +165,6 @@ struct timespec raw_time; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -static struct timespec xtime_cache __attribute__ ((aligned (16))); -void update_xtime_cache(u64 nsec) -{ - xtime_cache = xtime; - timespec_add_ns(&xtime_cache, nsec); -} - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) xtime = *tv; - update_xtime_cache(0); - timekeeper.ntp_error = 0; ntp_clear(); @@ -559,7 +550,6 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); total_sleep_time = timespec_add_safe(total_sleep_time, ts); } - update_xtime_cache(0); /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; @@ -622,6 +611,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) write_sequnlock_irqrestore(&xtime_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); + clocksource_suspend(); return 0; } @@ -787,7 +777,6 @@ void update_wall_time(void) { struct clocksource *clock; cycle_t offset; - u64 nsecs; int shift = 0, maxshift; /* Make sure we're fully resumed: */ @@ -817,7 +806,8 @@ void update_wall_time(void) shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { offset = logarithmic_accumulation(offset, shift); - shift--; + if(offset < timekeeper.cycle_interval<<shift) + shift--; } /* correct the clock when NTP error is too big */ @@ -845,7 +835,9 @@ void update_wall_time(void) timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; } - /* store full nanoseconds into xtime after rounding it up and + + /* + * Store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; @@ -853,8 +845,15 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); - update_xtime_cache(nsecs); + /* + * Finally, make sure that after the rounding + * xtime.tv_nsec isn't larger then NSEC_PER_SEC + */ + if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { + xtime.tv_nsec -= NSEC_PER_SEC; + xtime.tv_sec++; + second_overflow(); + } /* check to see if there is a new clocksource to use */ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); @@ -880,6 +879,7 @@ void getboottime(struct timespec *ts) set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } +EXPORT_SYMBOL_GPL(getboottime); /** * monotonic_to_bootbased - Convert the monotonic time to boot based. @@ -889,16 +889,17 @@ void monotonic_to_bootbased(struct timespec *ts) { *ts = timespec_add_safe(*ts, total_sleep_time); } +EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return xtime_cache.tv_sec; + return xtime.tv_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return xtime_cache; + return xtime; } struct timespec current_kernel_time(void) @@ -909,7 +910,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime_cache; + now = xtime; } while (read_seqretry(&xtime_lock, seq)); return now; @@ -924,7 +925,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime_cache; + now = xtime; mono = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index bdfb8dd1050c..ab8f5e33fa92 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -176,6 +176,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) P_ns(idle_waketime); P_ns(idle_exittime); P_ns(idle_sleeptime); + P_ns(iowait_sleeptime); P(last_jiffies); P(next_jiffies); P_ns(idle_expires); @@ -228,6 +229,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); SEQ_printf(m, "\n"); + SEQ_printf(m, " retries: %lu\n", dev->retries); } static void timer_list_show_tickdevices(struct seq_file *m) @@ -257,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.5\n"); + SEQ_printf(m, "Timer List Version: v0.6\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |