From cd05a1f818073a623455a58e756c5b419fc98db9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 17 Mar 2007 00:25:52 +0100 Subject: [PATCH] clockevents: Fix suspend/resume to disk hangs I finally found a dual core box, which survives suspend/resume without crashing in the middle of nowhere. Sigh, I never figured out from the code and the bug reports what's going on. The observed hangs are caused by a stale state transition of the clock event devices, which keeps the RCU synchronization away from completion, when the non boot CPU is brought back up. The suspend/resume in oneshot mode needs the similar care as the periodic mode during suspend to RAM. My assumption that the state transitions during the different shutdown/bringups of s2disk would go through the periodic boot phase and then switch over to highres resp. nohz mode were simply wrong. Add the appropriate suspend / resume handling for the non periodic modes. Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/time/tick-broadcast.c | 27 ++++++++++++++++++++++----- kernel/time/tick-common.c | 13 +++++++------ kernel/time/tick-internal.h | 11 ++++++++++- kernel/time/tick-oneshot.c | 12 ++++++++++++ 4 files changed, 51 insertions(+), 12 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 5567745470f7..eadfce2fff74 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -307,12 +307,19 @@ int tick_resume_broadcast(void) spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; - if (bc) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC && - !cpus_empty(tick_broadcast_mask)) - tick_broadcast_start_periodic(bc); - broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask); + if (bc) { + switch (tick_broadcast_device.mode) { + case TICKDEV_MODE_PERIODIC: + if(!cpus_empty(tick_broadcast_mask)) + tick_broadcast_start_periodic(bc); + broadcast = cpu_isset(smp_processor_id(), + tick_broadcast_mask); + break; + case TICKDEV_MODE_ONESHOT: + broadcast = tick_resume_broadcast_oneshot(bc); + break; + } } spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -347,6 +354,16 @@ static int tick_broadcast_set_event(ktime_t expires, int force) } } +int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +{ + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + + if(!cpus_empty(tick_broadcast_oneshot_mask)) + tick_broadcast_set_event(ktime_get(), 1); + + return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); +} + /* * Reprogram the broadcast device: * diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 43ba1bdec14c..bfda3f7f0716 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -298,18 +298,17 @@ static void tick_shutdown(unsigned int *cpup) spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_suspend_periodic(void) +static void tick_suspend(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; spin_lock_irqsave(&tick_device_lock, flags); - if (td->mode == TICKDEV_MODE_PERIODIC) - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_resume_periodic(void) +static void tick_resume(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; @@ -317,6 +316,8 @@ static void tick_resume_periodic(void) spin_lock_irqsave(&tick_device_lock, flags); if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); + else + tick_resume_oneshot(); spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -348,13 +349,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, break; case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend_periodic(); + tick_suspend(); tick_suspend_broadcast(); break; case CLOCK_EVT_NOTIFY_RESUME: if (!tick_resume_broadcast()) - tick_resume_periodic(); + tick_resume(); break; default: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 75890efd24ff..c9d203bde518 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -19,12 +19,13 @@ extern void tick_setup_oneshot(struct clock_event_device *newdev, extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); - +extern void tick_resume_oneshot(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { @@ -43,6 +44,10 @@ void tick_setup_oneshot(struct clock_event_device *newdev, { BUG(); } +static inline void tick_resume_oneshot(void) +{ + BUG(); +} static inline int tick_program_event(ktime_t expires, int force) { return 0; @@ -54,6 +59,10 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) } static inline void tick_broadcast_oneshot_control(unsigned long reason) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +{ + return 0; +} #endif /* !TICK_ONESHOT */ /* diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2e8b7ff863cc..f6997ab0c3c9 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -40,6 +40,18 @@ int tick_program_event(ktime_t expires, int force) } } +/** + * tick_resume_onshot - resume oneshot mode + */ +void tick_resume_oneshot(void) +{ + struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct clock_event_device *dev = td->evtdev; + + clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + tick_program_event(ktime_get(), 1); +} + /** * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) */ -- cgit v1.2.3 From 0444b3035e5f4981f4d1d96f9f0c3cbada1e6d69 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 23 Mar 2007 00:09:58 -0700 Subject: [PATCH] time: fix formatting in /proc/timer_list Fix the print formatting of three unsigned long fields in /proc/timer_list, which are currently being formatted as signed long. Signed-off-by: James Morris Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_list.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f82c635c3d5c..59df5e8555a8 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -194,9 +194,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td) return; } SEQ_printf(m, "%s\n", dev->name); - SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); - SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); - SEQ_printf(m, " mult: %ld\n", dev->mult); + SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); + SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); + SEQ_printf(m, " mult: %lu\n", dev->mult); SEQ_printf(m, " shift: %d\n", dev->shift); SEQ_printf(m, " mode: %d\n", dev->mode); SEQ_printf(m, " next_event: %Ld nsecs\n", -- cgit v1.2.3 From 948ac6d71cf868b431adb3139d8dfbd9c4e4a6ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Mar 2007 14:42:51 +0200 Subject: [PATCH] clocksource: Fix thinko in watchdog selection The watchdog implementation excludes low res / non continuous clocksources from being selected as a watchdog reference unintentionally. Allow using jiffies/PIT as a watchdog reference as long as no better clocksource is available. This is necessary to detect TSC breakage on systems, which have no pmtimer/hpet. The main goal of the initial patch (preventing to switch to highres/nohz when no reliable fallback clocksource is available) is still guaranteed by the checks in clocksource_watchdog(). Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/time/clocksource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5b0e46b56fd0..fe5c7db24247 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -151,7 +151,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer(&watchdog_timer); } - } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { + } else { + if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; if (!watchdog || cs->rating > watchdog->rating) { -- cgit v1.2.3 From 291bc047e125ff02c9affe06a7df28bed57b054d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Mar 2007 11:21:08 +0200 Subject: [PATCH] clockevents: remove bad designed sysfs support for now The current sysfs support of clockevents does not obey the "only one value per file" rule. The real fix is not 2.6.21 material. Therefor remove the sysfs support for now. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- kernel/time/clockevents.c | 69 ----------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 67932ea78c17..76212b2a99de 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -274,72 +274,3 @@ void clockevents_notify(unsigned long reason, void *arg) } EXPORT_SYMBOL_GPL(clockevents_notify); -#ifdef CONFIG_SYSFS - -/** - * clockevents_show_registered - sysfs interface for listing clockevents - * @dev: unused - * @buf: char buffer to be filled with clock events list - * - * Provides sysfs interface for listing registered clock event devices - */ -static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) -{ - struct list_head *tmp; - char *p = buf; - int cpu; - - spin_lock(&clockevents_lock); - - list_for_each(tmp, &clockevent_devices) { - struct clock_event_device *ce; - - ce = list_entry(tmp, struct clock_event_device, list); - p += sprintf(p, "%-20s F:%04x M:%d", ce->name, - ce->features, ce->mode); - p += sprintf(p, " C:"); - if (!cpus_equal(ce->cpumask, cpu_possible_map)) { - for_each_cpu_mask(cpu, ce->cpumask) - p += sprintf(p, " %d", cpu); - } else { - /* - * FIXME: Add the cpu which is handling this sucker - */ - } - p += sprintf(p, "\n"); - } - - spin_unlock(&clockevents_lock); - - return p - buf; -} - -/* - * Sysfs setup bits: - */ -static SYSDEV_ATTR(registered, 0600, - clockevents_show_registered, NULL); - -static struct sysdev_class clockevents_sysclass = { - set_kset_name("clockevents"), -}; - -static struct sys_device clockevents_sys_device = { - .id = 0, - .cls = &clockevents_sysclass, -}; - -static int __init clockevents_sysfs_init(void) -{ - int error = sysdev_class_register(&clockevents_sysclass); - - if (!error) - error = sysdev_register(&clockevents_sys_device); - if (!error) - error = sysdev_create_file( - &clockevents_sys_device, - &attr_registered); - return error; -} -device_initcall(clockevents_sysfs_init); -#endif -- cgit v1.2.3 From d62ac21aa075c8ddf3d02a98d28afce635e77e8e Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Mar 2007 21:32:26 -0800 Subject: [PATCH] ntp: avoid time_offset overflows I've been seeing some odd NTP behavior recently on a few boxes and finally narrowed it down to time_offset overflowing when converted to SHIFT_UPDATE units (which was a side effect from my HZfreeNTP patch). This patch converts time_offset from a long to a s64 which resolves the issue. [tglx@linutronix.de: signedness fixes] Signed-off-by: John Stultz Cc: Roman Zippel Cc: john stultz Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index eb12509e00bd..cb25649c6f50 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -32,7 +32,7 @@ static u64 tick_length, tick_length_base; /* TIME_ERROR prevents overwriting the CMOS clock */ static int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -static long time_offset; /* time adjustment (ns) */ +static s64 time_offset; /* time adjustment (ns) */ static long time_constant = 2; /* pll time constant */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ @@ -196,7 +196,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) */ int do_adjtimex(struct timex *txc) { - long ltemp, mtemp, save_adjust; + long mtemp, save_adjust, rem; s64 freq_adj, temp64; int result; @@ -277,14 +277,14 @@ int do_adjtimex(struct timex *txc) time_adjust = txc->offset; } else if (time_status & STA_PLL) { - ltemp = txc->offset * NSEC_PER_USEC; + time_offset = txc->offset * NSEC_PER_USEC; /* * Scale the phase adjustment and * clamp to the operating range. */ - time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); - time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); + time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); + time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); /* * Select whether the frequency is to be controlled @@ -297,11 +297,11 @@ int do_adjtimex(struct timex *txc) mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; - freq_adj = (s64)time_offset * mtemp; + freq_adj = time_offset * mtemp; freq_adj = shift_right(freq_adj, time_constant * 2 + (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); + temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); if (time_offset < 0) { temp64 = -temp64; do_div(temp64, mtemp); @@ -314,8 +314,10 @@ int do_adjtimex(struct timex *txc) freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / NTP_INTERVAL_FREQ) - << SHIFT_UPDATE; + time_offset = div_long_long_rem_signed(time_offset, + NTP_INTERVAL_FREQ, + &rem); + time_offset <<= SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -328,12 +330,12 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) result = TIME_ERROR; if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; + txc->offset = save_adjust; else - txc->offset = shift_right(time_offset, SHIFT_UPDATE) - * NTP_INTERVAL_FREQ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) - << (SHIFT_USEC - SHIFT_NSEC); + txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * + NTP_INTERVAL_FREQ / 1000; + txc->freq = (time_freq / NSEC_PER_USEC) << + (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.3 From 98de9e3ba23422b5c45b91c93aec1cb1e17514dc Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 4 Apr 2007 19:08:24 -0700 Subject: [PATCH] fix jiffies clocksource inittime In debugging a problem w/ the -rt tree, I noticed that on systems that mark the tsc as unstable before it is registered, the TSC would still be selected and used for a short period of time. Digging in it looks to be a result of the mix of the clocksource list changes and my clocksource initialization changes. With the -rt tree, using a bad TSC, even for a short period of time can results in a hang at boot. I was not able to reproduce this hang w/ mainline, but I'm not completely certain that someone won't trip on it. This patch resolves the issue by initializing the jiffies clocksource earlier so a bad TSC won't get selected just because nothing else is yet registered. Signed-off-by: John Stultz Acked-by: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 3be8da8fed7e..4c256fdb8875 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -69,4 +69,4 @@ static int __init init_jiffies_clocksource(void) return clocksource_register(&clocksource_jiffies); } -module_init(init_jiffies_clocksource); +core_initcall(init_jiffies_clocksource); -- cgit v1.2.3