diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-05 17:31:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-05 17:31:14 -0700 |
commit | bb896afe2089575ca1bb1fbf3f07b934e1ba999b (patch) | |
tree | 7300f9b4e3e267fe97d898440c72ff2c4c327f23 /kernel/sched_clock.c | |
parent | 2e83fc4df5f27dfc1b53044c4f142b2f9d1db08c (diff) | |
parent | aac6abca858386438d9a7233c3471d2ecfa2f704 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-fixes
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-fixes:
sched: default to n for GROUP_SCHED and FAIR_GROUP_SCHED
sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
sched, x86: add HAVE_UNSTABLE_SCHED_CLOCK
sched: fix cpu clock
sched: fair-group: fix a Div0 error of the fair group scheduler
sched: fix missing locking in sched_domains code
sched: make clock sync tunable by architecture code
sched: fix debugging
sched: fix sched_info_switch not being called according to documentation
sched: fix hrtick_start_fair and CPU-Hotplug
sched: fix SCHED_FAIR wake-idle logic error
sched: fix RT task-wakeup logic
sched: add statics, don't return void expressions
sched: add debug checks to idle functions
sched: remove old sched doc
sched: make rt_sched_class, idle_sched_class static
sched: optimize calc_delta_mine()
sched: fix normalized sleeper
Diffstat (limited to 'kernel/sched_clock.c')
-rw-r--r-- | kernel/sched_clock.c | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c new file mode 100644 index 000000000000..9c597e37f7de --- /dev/null +++ b/kernel/sched_clock.c @@ -0,0 +1,236 @@ +/* + * sched_clock for unstable cpu clocks + * + * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * + * Based on code by: + * Ingo Molnar <mingo@redhat.com> + * Guillaume Chazarain <guichaz@gmail.com> + * + * Create a semi stable clock from a mixture of other events, including: + * - gtod + * - jiffies + * - sched_clock() + * - explicit idle events + * + * We use gtod as base and the unstable clock deltas. The deltas are filtered, + * making it monotonic and keeping it within an expected window. This window + * is set up using jiffies. + * + * Furthermore, explicit sleep and wakeup hooks allow us to account for time + * that is otherwise invisible (TSC gets stopped). + * + * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat + * consistent between cpus (never more than 1 jiffies difference). + */ +#include <linux/sched.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <linux/ktime.h> +#include <linux/module.h> + + +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + +struct sched_clock_data { + /* + * Raw spinlock - this is a special case: this might be called + * from within instrumentation code so we dont want to do any + * instrumentation ourselves. + */ + raw_spinlock_t lock; + + unsigned long prev_jiffies; + u64 prev_raw; + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) +{ + return &__get_cpu_var(sched_clock_data); +} + +static inline struct sched_clock_data *cpu_sdc(int cpu) +{ + return &per_cpu(sched_clock_data, cpu); +} + +void sched_clock_init(void) +{ + u64 ktime_now = ktime_to_ns(ktime_get()); + u64 now = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct sched_clock_data *scd = cpu_sdc(cpu); + + scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + scd->prev_jiffies = jiffies; + scd->prev_raw = now; + scd->tick_raw = now; + scd->tick_gtod = ktime_now; + scd->clock = ktime_now; + } +} + +/* + * update the percpu scd from the raw @now value + * + * - filter out backward motion + * - use jiffies to generate a min,max window to clip the raw values + */ +static void __update_sched_clock(struct sched_clock_data *scd, u64 now) +{ + unsigned long now_jiffies = jiffies; + long delta_jiffies = now_jiffies - scd->prev_jiffies; + u64 clock = scd->clock; + u64 min_clock, max_clock; + s64 delta = now - scd->prev_raw; + + WARN_ON_ONCE(!irqs_disabled()); + min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC; + + if (unlikely(delta < 0)) { + clock++; + goto out; + } + + max_clock = min_clock + TICK_NSEC; + + if (unlikely(clock + delta > max_clock)) { + if (clock < max_clock) + clock = max_clock; + else + clock++; + } else { + clock += delta; + } + + out: + if (unlikely(clock < min_clock)) + clock = min_clock; + + scd->prev_raw = now; + scd->prev_jiffies = now_jiffies; + scd->clock = clock; +} + +static void lock_double_clock(struct sched_clock_data *data1, + struct sched_clock_data *data2) +{ + if (data1 < data2) { + __raw_spin_lock(&data1->lock); + __raw_spin_lock(&data2->lock); + } else { + __raw_spin_lock(&data2->lock); + __raw_spin_lock(&data1->lock); + } +} + +u64 sched_clock_cpu(int cpu) +{ + struct sched_clock_data *scd = cpu_sdc(cpu); + u64 now, clock; + + WARN_ON_ONCE(!irqs_disabled()); + now = sched_clock(); + + if (cpu != raw_smp_processor_id()) { + /* + * in order to update a remote cpu's clock based on our + * unstable raw time rebase it against: + * tick_raw (offset between raw counters) + * tick_gotd (tick offset between cpus) + */ + struct sched_clock_data *my_scd = this_scd(); + + lock_double_clock(scd, my_scd); + + now -= my_scd->tick_raw; + now += scd->tick_raw; + + now -= my_scd->tick_gtod; + now += scd->tick_gtod; + + __raw_spin_unlock(&my_scd->lock); + } else { + __raw_spin_lock(&scd->lock); + } + + __update_sched_clock(scd, now); + clock = scd->clock; + + __raw_spin_unlock(&scd->lock); + + return clock; +} + +void sched_clock_tick(void) +{ + struct sched_clock_data *scd = this_scd(); + u64 now, now_gtod; + + WARN_ON_ONCE(!irqs_disabled()); + + now = sched_clock(); + now_gtod = ktime_to_ns(ktime_get()); + + __raw_spin_lock(&scd->lock); + __update_sched_clock(scd, now); + /* + * update tick_gtod after __update_sched_clock() because that will + * already observe 1 new jiffy; adding a new tick_gtod to that would + * increase the clock 2 jiffies. + */ + scd->tick_raw = now; + scd->tick_gtod = now_gtod; + __raw_spin_unlock(&scd->lock); +} + +/* + * We are going deep-idle (irqs are disabled): + */ +void sched_clock_idle_sleep_event(void) +{ + sched_clock_cpu(smp_processor_id()); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); + +/* + * We just idled delta nanoseconds (called with irqs disabled): + */ +void sched_clock_idle_wakeup_event(u64 delta_ns) +{ + struct sched_clock_data *scd = this_scd(); + u64 now = sched_clock(); + + /* + * Override the previous timestamp and ignore all + * sched_clock() deltas that occured while we idled, + * and use the PM-provided delta_ns to advance the + * rq clock: + */ + __raw_spin_lock(&scd->lock); + scd->prev_raw = now; + scd->clock += delta_ns; + __raw_spin_unlock(&scd->lock); + + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); + +#endif + +/* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); +} |