diff options
Diffstat (limited to 'arch/powerpc/kernel/time.c')
-rw-r--r-- | arch/powerpc/kernel/time.c | 503 |
1 files changed, 227 insertions, 276 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index c627cf86d1e3..9368da371f36 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -65,24 +65,68 @@ #include <asm/div64.h> #include <asm/smp.h> #include <asm/vdso_datapage.h> -#ifdef CONFIG_PPC64 #include <asm/firmware.h> -#endif #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/it_lp_queue.h> #include <asm/iseries/hv_call_xm.h> #endif -#include <asm/smp.h> -/* keep track of when we need to update the rtc */ -time_t last_rtc_update; +/* powerpc clocksource/clockevent code */ + +#include <linux/clockchips.h> +#include <linux/clocksource.h> + +static cycle_t rtc_read(void); +static struct clocksource clocksource_rtc = { + .name = "rtc", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .shift = 22, + .mult = 0, /* To be filled in */ + .read = rtc_read, +}; + +static cycle_t timebase_read(void); +static struct clocksource clocksource_timebase = { + .name = "timebase", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .shift = 22, + .mult = 0, /* To be filled in */ + .read = timebase_read, +}; + +#define DECREMENTER_MAX 0x7fffffff + +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev); +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev); + +static struct clock_event_device decrementer_clockevent = { + .name = "decrementer", + .rating = 200, + .shift = 16, + .mult = 0, /* To be filled in */ + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT, +}; + +static DEFINE_PER_CPU(struct clock_event_device, decrementers); +void init_decrementer_clockevent(void); +static DEFINE_PER_CPU(u64, decrementer_next_tb); + #ifdef CONFIG_PPC_ISERIES static unsigned long __initdata iSeries_recal_titan; static signed long __initdata iSeries_recal_tb; -#endif -/* The decrementer counts down by 128 every 128ns on a 601. */ -#define DECREMENTER_COUNT_601 (1000000000 / HZ) +/* Forward declaration is only needed for iSereis compiles */ +void __init clocksource_init(void); +#endif #define XSEC_PER_SEC (1024*1024) @@ -349,98 +393,6 @@ void udelay(unsigned long usecs) } EXPORT_SYMBOL(udelay); -static __inline__ void timer_check_rtc(void) -{ - /* - * update the rtc when needed, this should be performed on the - * right fraction of a second. Half or full second ? - * Full second works on mk48t59 clocks, others need testing. - * Note that this update is basically only used through - * the adjtimex system calls. Setting the HW clock in - * any other way is a /dev/rtc and userland business. - * This is still wrong by -0.5/+1.5 jiffies because of the - * timer interrupt resolution and possible delay, but here we - * hit a quantization limit which can only be solved by higher - * resolution timers and decoupling time management from timer - * interrupts. This is also wrong on the clocks - * which require being written at the half second boundary. - * We should have an rtc call that only sets the minutes and - * seconds like on Intel to avoid problems with non UTC clocks. - */ - if (ppc_md.set_rtc_time && ntp_synced() && - xtime.tv_sec - last_rtc_update >= 659 && - abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ) { - struct rtc_time tm; - to_tm(xtime.tv_sec + 1 + timezone_offset, &tm); - tm.tm_year -= 1900; - tm.tm_mon -= 1; - if (ppc_md.set_rtc_time(&tm) == 0) - last_rtc_update = xtime.tv_sec + 1; - else - /* Try again one minute later */ - last_rtc_update += 60; - } -} - -/* - * This version of gettimeofday has microsecond resolution. - */ -static inline void __do_gettimeofday(struct timeval *tv) -{ - unsigned long sec, usec; - u64 tb_ticks, xsec; - struct gettimeofday_vars *temp_varp; - u64 temp_tb_to_xs, temp_stamp_xsec; - - /* - * These calculations are faster (gets rid of divides) - * if done in units of 1/2^20 rather than microseconds. - * The conversion to microseconds at the end is done - * without a divide (and in fact, without a multiply) - */ - temp_varp = do_gtod.varp; - - /* Sampling the time base must be done after loading - * do_gtod.varp in order to avoid racing with update_gtod. - */ - data_barrier(temp_varp); - tb_ticks = get_tb() - temp_varp->tb_orig_stamp; - temp_tb_to_xs = temp_varp->tb_to_xs; - temp_stamp_xsec = temp_varp->stamp_xsec; - xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs); - sec = xsec / XSEC_PER_SEC; - usec = (unsigned long)xsec & (XSEC_PER_SEC - 1); - usec = SCALE_XSEC(usec, 1000000); - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -void do_gettimeofday(struct timeval *tv) -{ - if (__USE_RTC()) { - /* do this the old way */ - unsigned long flags, seq; - unsigned int sec, nsec, usec; - - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - sec = xtime.tv_sec; - nsec = xtime.tv_nsec + tb_ticks_since(tb_last_jiffy); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - usec = nsec / 1000; - while (usec >= 1000000) { - usec -= 1000000; - ++sec; - } - tv->tv_sec = sec; - tv->tv_usec = usec; - return; - } - __do_gettimeofday(tv); -} - -EXPORT_SYMBOL(do_gettimeofday); /* * There are two copies of tb_to_xs and stamp_xsec so that no @@ -486,56 +438,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, ++(vdso_data->tb_update_count); } -/* - * When the timebase - tb_orig_stamp gets too big, we do a manipulation - * between tb_orig_stamp and stamp_xsec. The goal here is to keep the - * difference tb - tb_orig_stamp small enough to always fit inside a - * 32 bits number. This is a requirement of our fast 32 bits userland - * implementation in the vdso. If we "miss" a call to this function - * (interrupt latency, CPU locked in a spinlock, ...) and we end up - * with a too big difference, then the vdso will fallback to calling - * the syscall - */ -static __inline__ void timer_recalc_offset(u64 cur_tb) -{ - unsigned long offset; - u64 new_stamp_xsec; - u64 tlen, t2x; - u64 tb, xsec_old, xsec_new; - struct gettimeofday_vars *varp; - - if (__USE_RTC()) - return; - tlen = current_tick_length(); - offset = cur_tb - do_gtod.varp->tb_orig_stamp; - if (tlen == last_tick_len && offset < 0x80000000u) - return; - if (tlen != last_tick_len) { - t2x = mulhdu(tlen << TICKLEN_SHIFT, ticklen_to_xs); - last_tick_len = tlen; - } else - t2x = do_gtod.varp->tb_to_xs; - new_stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; - do_div(new_stamp_xsec, 1000000000); - new_stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; - - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * Make sure time doesn't go backwards for userspace gettimeofday. - */ - tb = get_tb(); - varp = do_gtod.varp; - xsec_old = mulhdu(tb - varp->tb_orig_stamp, varp->tb_to_xs) - + varp->stamp_xsec; - xsec_new = mulhdu(tb - cur_tb, t2x) + new_stamp_xsec; - if (xsec_new < xsec_old) - new_stamp_xsec += xsec_old - xsec_new; - - update_gtod(cur_tb, new_stamp_xsec, t2x); -} - #ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { @@ -607,6 +509,8 @@ static int __init iSeries_tb_recal(void) iSeries_recal_titan = titan; iSeries_recal_tb = tb; + /* Called here as now we know accurate values for the timebase */ + clocksource_init(); return 0; } late_initcall(iSeries_tb_recal); @@ -636,20 +540,30 @@ void __init iSeries_time_init_early(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - int next_dec; int cpu = smp_processor_id(); - unsigned long ticks; - u64 tb_next_jiffy; + struct clock_event_device *evt = &per_cpu(decrementers, cpu); + u64 now; + + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continuue to take decrementer exceptions */ + set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif + now = get_tb_or_rtc(); + if (now < per_cpu(decrementer_next_tb, cpu)) { + /* not time for this event yet */ + now = per_cpu(decrementer_next_tb, cpu) - now; + if (now <= DECREMENTER_MAX) + set_dec((unsigned int)now - 1); + return; + } old_regs = set_irq_regs(regs); irq_enter(); - profile_tick(CPU_PROFILING); calculate_steal_time(); #ifdef CONFIG_PPC_ISERIES @@ -657,46 +571,20 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu))) - >= tb_ticks_per_jiffy) { - /* Update last_jiffy */ - per_cpu(last_jiffy, cpu) += tb_ticks_per_jiffy; - /* Handle RTCL overflow on 601 */ - if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000) - per_cpu(last_jiffy, cpu) -= 1000000000; - - /* - * We cannot disable the decrementer, so in the period - * between this cpu's being marked offline in cpu_online_map - * and calling stop-self, it is taking timer interrupts. - * Avoid calling into the scheduler rebalancing code if this - * is the case. - */ - if (!cpu_is_offline(cpu)) - account_process_time(regs); - - /* - * No need to check whether cpu is offline here; boot_cpuid - * should have been fixed up by now. - */ - if (cpu != boot_cpuid) - continue; + /* + * We cannot disable the decrementer, so in the period + * between this cpu's being marked offline in cpu_online_map + * and calling stop-self, it is taking timer interrupts. + * Avoid calling into the scheduler rebalancing code if this + * is the case. + */ + if (!cpu_is_offline(cpu)) + account_process_time(regs); - write_seqlock(&xtime_lock); - tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy; - if (__USE_RTC() && tb_next_jiffy >= 1000000000) - tb_next_jiffy -= 1000000000; - if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) { - tb_last_jiffy = tb_next_jiffy; - do_timer(1); - timer_recalc_offset(tb_last_jiffy); - timer_check_rtc(); - } - write_sequnlock(&xtime_lock); - } - - next_dec = tb_ticks_per_jiffy - ticks; - set_dec(next_dec); + if (evt->event_handler) + evt->event_handler(evt); + else + evt->set_next_event(DECREMENTER_MAX, evt); #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) @@ -762,71 +650,6 @@ unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, new_sec = tv->tv_sec; - long wtm_nsec, new_nsec = tv->tv_nsec; - unsigned long flags; - u64 new_xsec; - unsigned long tb_delta; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irqsave(&xtime_lock, flags); - - /* - * Updating the RTC is not the job of this code. If the time is - * stepped under NTP, the RTC will be updated after STA_UNSYNC - * is cleared. Tools like clock/hwclock either copy the RTC - * to the system time, in which case there is no point in writing - * to the RTC again, or write to the RTC but then they don't call - * settimeofday to perform this operation. - */ - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * Subtract off the number of nanoseconds since the - * beginning of the last tick. - */ - tb_delta = tb_ticks_since(tb_last_jiffy); - tb_delta = mulhdu(tb_delta, do_gtod.varp->tb_to_xs); /* in xsec */ - new_nsec -= SCALE_XSEC(tb_delta, 1000000000); - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec); - - set_normalized_timespec(&xtime, new_sec, new_nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - /* In case of a large backwards jump in time with NTP, we want the - * clock to be updated as soon as the PLL is again in lock. - */ - last_rtc_update = new_sec - 658; - - ntp_clear(); - - new_xsec = xtime.tv_nsec; - if (new_xsec != 0) { - new_xsec *= XSEC_PER_SEC; - do_div(new_xsec, NSEC_PER_SEC); - } - new_xsec += (u64)xtime.tv_sec * XSEC_PER_SEC; - update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs); - - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; - - write_sequnlock_irqrestore(&xtime_lock, flags); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; @@ -869,7 +692,7 @@ void __init generic_calibrate_decr(void) "(not found)\n"); } -#ifdef CONFIG_BOOKE +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); mtspr(SPRN_TBWU, 0); @@ -882,12 +705,35 @@ void __init generic_calibrate_decr(void) #endif } -unsigned long get_boot_time(void) +int update_persistent_clock(struct timespec now) { struct rtc_time tm; - if (ppc_md.get_boot_time) - return ppc_md.get_boot_time(); + if (!ppc_md.set_rtc_time) + return 0; + + to_tm(now.tv_sec + 1 + timezone_offset, &tm); + tm.tm_year -= 1900; + tm.tm_mon -= 1; + + return ppc_md.set_rtc_time(&tm); +} + +unsigned long read_persistent_clock(void) +{ + struct rtc_time tm; + static int first = 1; + + /* XXX this is a litle fragile but will work okay in the short term */ + if (first) { + first = 0; + if (ppc_md.time_init) + timezone_offset = ppc_md.time_init(); + + /* get_boot_time() isn't guaranteed to be safe to call late */ + if (ppc_md.get_boot_time) + return ppc_md.get_boot_time() -timezone_offset; + } if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); @@ -895,18 +741,128 @@ unsigned long get_boot_time(void) tm.tm_hour, tm.tm_min, tm.tm_sec); } +/* clocksource code */ +static cycle_t rtc_read(void) +{ + return (cycle_t)get_rtc(); +} + +static cycle_t timebase_read(void) +{ + return (cycle_t)get_tb(); +} + +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +{ + u64 t2x, stamp_xsec; + + if (clock != &clocksource_timebase) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_mb(); + + /* XXX this assumes clock->shift == 22 */ + /* 4611686018 ~= 2^(20+64-22) / 1e9 */ + t2x = (u64) clock->mult * 4611686018ULL; + stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; + do_div(stamp_xsec, 1000000000); + stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; + update_gtod(clock->cycle_last, stamp_xsec, t2x); +} + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_mb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_mb(); + ++vdso_data->tb_update_count; +} + +void __init clocksource_init(void) +{ + struct clocksource *clock; + + if (__USE_RTC()) + clock = &clocksource_rtc; + else + clock = &clocksource_timebase; + + clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift); + + if (clocksource_register(clock)) { + printk(KERN_ERR "clocksource: %s is already registered\n", + clock->name); + return; + } + + printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n", + clock->name, clock->mult, clock->shift); +} + +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev) +{ + __get_cpu_var(decrementer_next_tb) = get_tb_or_rtc() + evt; + /* The decrementer interrupts on the 0 -> -1 transition */ + if (evt) + --evt; + set_dec(evt); + return 0; +} + +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev) +{ + if (mode != CLOCK_EVT_MODE_ONESHOT) + decrementer_set_next_event(DECREMENTER_MAX, dev); +} + +static void register_decrementer_clockevent(int cpu) +{ + struct clock_event_device *dec = &per_cpu(decrementers, cpu); + + *dec = decrementer_clockevent; + dec->cpumask = cpumask_of_cpu(cpu); + + printk(KERN_ERR "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); + + clockevents_register_device(dec); +} + +void init_decrementer_clockevent(void) +{ + int cpu = smp_processor_id(); + + decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC, + decrementer_clockevent.shift); + decrementer_clockevent.max_delta_ns = + clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + decrementer_clockevent.min_delta_ns = 1000; + + register_decrementer_clockevent(cpu); +} + +void secondary_cpu_time_init(void) +{ + /* FIME: Should make unrelatred change to move snapshot_timebase + * call here ! */ + register_decrementer_clockevent(smp_processor_id()); +} + /* This function is only called on the boot processor */ void __init time_init(void) { unsigned long flags; - unsigned long tm = 0; struct div_result res; u64 scale, x; unsigned shift; - if (ppc_md.time_init != NULL) - timezone_offset = ppc_md.time_init(); - if (__USE_RTC()) { /* 601 processor: dec counts down by 128 every 128ns */ ppc_tb_freq = 1000000000; @@ -981,19 +937,14 @@ void __init time_init(void) /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ boot_tb = get_tb_or_rtc(); - tm = get_boot_time(); - write_seqlock_irqsave(&xtime_lock, flags); /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; - tm -= timezone_offset; } - xtime.tv_sec = tm; - xtime.tv_nsec = 0; do_gtod.varp = &do_gtod.vars[0]; do_gtod.var_idx = 0; do_gtod.varp->tb_orig_stamp = tb_last_jiffy; @@ -1011,13 +962,13 @@ void __init time_init(void) time_freq = 0; - last_rtc_update = xtime.tv_sec; - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); - /* Not exact, but the timer interrupt takes care of this */ - set_dec(tb_ticks_per_jiffy); + /* Register the clocksource, if we're not running on iSeries */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + clocksource_init(); + + init_decrementer_clockevent(); } |