From 298a5df45d497e66064fda22ef0abf13766d3333 Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Tue, 11 Sep 2007 15:24:03 -0700 Subject: Fix "no_sync_cmos_clock" logic inversion in kernel/time/ntp.c Seems to me that this timer will only get started on platforms that say they don't want it? Signed-off-by: Tony Breeds Cc: Paul Mackerras Cc: Gabriel Paubert Cc: Zachary Amsden Acked-by: Thomas Gleixner Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cd91237dbfe3..de6a2d6b3ebb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -226,7 +226,7 @@ static void sync_cmos_clock(unsigned long dummy) static void notify_cmos_timer(void) { - if (no_sync_cmos_clock) + if (!no_sync_cmos_clock) mod_timer(&sync_cmos_timer, jiffies + 1); } -- cgit v1.2.3 From 3be9095063885d482b87d3875ea7f28e635882d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: timekeeping: access rtc outside of xtime lock Lockdep complains about the access of rtc in timekeeping_suspend inside the interrupt disabled region of the write locked xtime lock. Move the access outside. Signed-off-by: Thomas Gleixner Cc: John Stultz --- kernel/time/timekeeping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acc417b5a9b7..f682091fa890 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -325,9 +325,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + timekeeping_suspend_time = read_persistent_clock(); + write_seqlock_irqsave(&xtime_lock, flags); timekeeping_suspended = 1; - timekeeping_suspend_time = read_persistent_clock(); write_sequnlock_irqrestore(&xtime_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); -- cgit v1.2.3 From 6a669ee8a790487b7ec1edda762d39615a78264b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: timekeeping: Prevent time going backwards on resume Timekeeping resume adjusts xtime by adding the slept time in seconds and resets the reference value of the clock source (clock->cycle_last). clock->cycle last is used to calculate the delta between the last xtime update and the readout of the clock source in __get_nsec_offset(). xtime plus the offset is the current time. The resume code ignores the delta which had already elapsed between the last xtime update and the actual time of suspend. If the suspend time is short, then we can see time going backwards on resume. Suspend: offs_s = clock->read() - clock->cycle_last; now = xtime + offs_s; timekeeping_suspend_time = read_rtc(); Resume: sleep_time = read_rtc() - timekeeping_suspend_time; xtime.tv_sec += sleep_time; clock->cycle_last = clock->read(); offs_r = clock->read() - clock->cycle_last; now = xtime + offs_r; if sleep_time_seconds == 0 and offs_r < offs_s, then time goes backwards. Fix this by storing the offset from the last xtime update and add it to xtime during resume, when we reset clock->cycle_last: sleep_time = read_rtc() - timekeeping_suspend_time; xtime.tv_sec += sleep_time; xtime += offs_s; /* Fixup xtime offset at suspend time */ clock->cycle_last = clock->read(); offs_r = clock->read() - clock->cycle_last; now = xtime + offs_r; Thanks to Marcelo for tracking this down on the OLPC and providing the necessary details to analyze the root cause. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Tosatti --- kernel/time/timekeeping.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f682091fa890..4ad79f6bdec6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -217,6 +217,7 @@ static void change_clocksource(void) } #else static inline void change_clocksource(void) { } +static inline s64 __get_nsec_offset(void) { return 0; } #endif /** @@ -280,6 +281,8 @@ void __init timekeeping_init(void) static int timekeeping_suspended; /* time in seconds when suspend began */ static unsigned long timekeeping_suspend_time; +/* xtime offset when we went into suspend */ +static s64 timekeeping_suspend_nsecs; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -305,6 +308,8 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic.tv_sec -= sleep_length; total_sleep_time += sleep_length; } + /* Make sure that we have the correct xtime reference */ + timespec_add_ns(&xtime, timekeeping_suspend_nsecs); /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); clock->error = 0; @@ -328,6 +333,8 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); + /* Get the current xtime offset */ + timekeeping_suspend_nsecs = __get_nsec_offset(); timekeeping_suspended = 1; write_sequnlock_irqrestore(&xtime_lock, flags); -- cgit v1.2.3 From 07eec6af448d13a6a520d9c6f06f2e87f61b567a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: Enforce oneshot broadcast when broadcast mask is set on resume The jinxed VAIO refuses to resume without hitting keys on the keyboard when this is not enforced. It is unclear why the cpu ends up in a lower C State without notifying the clock events layer, but enforcing the oneshot broadcast here is safe. Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index db8e0f3d409b..947959fb2bb5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -382,12 +382,23 @@ static int tick_broadcast_set_event(ktime_t expires, int force) int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + + /* + * If the CPU is marked for broadcast, enforce oneshot + * broadcast mode. The jinxed VAIO does not resume otherwise. + * No idea why it ends up in a lower C State during resume + * without notifying the clock events layer. + */ + if (cpu_isset(cpu, tick_broadcast_mask)) + cpu_set(cpu, tick_broadcast_oneshot_mask); + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); if(!cpus_empty(tick_broadcast_oneshot_mask)) tick_broadcast_set_event(ktime_get(), 1); - return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); + return cpu_isset(cpu, tick_broadcast_oneshot_mask); } /* -- cgit v1.2.3 From 31d9b3938c0459e5e9755ce0a98ac1e24eeff972 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: do not shutdown the oneshot broadcast device When a cpu goes offline it is removed from the broadcast masks. If the mask becomes empty the code shuts down the broadcast device. This is wrong, because the broadcast device needs to be ready for the online cpu going idle (into a c-state, which stops the local apic timer). Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 947959fb2bb5..aab881c86a1a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -560,20 +560,17 @@ void tick_broadcast_switch_to_oneshot(void) */ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { - struct clock_event_device *bc; unsigned long flags; unsigned int cpu = *cpup; spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; + /* + * Clear the broadcast mask flag for the dead cpu, but do not + * stop the broadcast device! + */ cpu_clear(cpu, tick_broadcast_oneshot_mask); - if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { - if (bc && cpus_empty(tick_broadcast_oneshot_mask)) - clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); - } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3 From 5e41d0d60a534d2a5dc9772600a58f44c8d12506 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: prevent stale tick update on offline cpu Taking a cpu offline removes the cpu from the online mask before the CPU_DEAD notification is done. The clock events layer does the cleanup of the dead CPU from the CPU_DEAD notifier chain. tick_do_timer_cpu is used to avoid xtime lock contention by assigning the task of jiffies xtime updates to one CPU. If a CPU is taken offline, then this assignment becomes stale. This went unnoticed because most of the time the offline CPU went dead before the online CPU reached __cpu_die(), where the CPU_DEAD state is checked. In the case that the offline CPU did not reach the DEAD state before we reach __cpu_die(), the code in there goes to sleep for 100ms. Due to the stale time update assignment, the system is stuck forever. Take the assignment away when a cpu is not longer in the cpu_online_mask. We do this in the last call to tick_nohz_stop_sched_tick() when the offline CPU is on the way to the final play_dead() idle entry. Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b416995b9757..8c3fef1db09c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -160,6 +160,18 @@ void tick_nohz_stop_sched_tick(void) cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = -1; + } + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; -- cgit v1.2.3 From b7e113dc9d52c4a37d2da6fafe77959f3a28eccf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 22 Sep 2007 22:29:06 +0000 Subject: clockevents: remove the suspend/resume workaround^Wthinko In a desparate attempt to fix the suspend/resume problem on Andrews VAIO I added a workaround which enforced the broadcast of the oneshot timer on resume. This was actually resolving the problem on the VAIO but was just a stupid workaround, which was not tackling the root cause: the assignement of lower idle C-States in the ACPI processor_idle code. The cpuidle patches, which utilize the dynamic tick feature and go faster into deeper C-states exposed the problem again. The correct solution is the previous patch, which prevents lower C-states across the suspend/resume. Remove the enforcement code, including the conditional broadcast timer arming, which helped to pamper over the real problem for quite a time. The oneshot broadcast flag for the cpu, which runs the resume code can never be set at the time when this code is executed. It only gets set, when the CPU is entering a lower idle C-State. Signed-off-by: Thomas Gleixner Tested-by: Andrew Morton Cc: Len Brown Cc: Venkatesh Pallipadi Cc: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- kernel/time/tick-broadcast.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index aab881c86a1a..0962e0577660 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -382,23 +382,8 @@ static int tick_broadcast_set_event(ktime_t expires, int force) int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { - int cpu = smp_processor_id(); - - /* - * If the CPU is marked for broadcast, enforce oneshot - * broadcast mode. The jinxed VAIO does not resume otherwise. - * No idea why it ends up in a lower C State during resume - * without notifying the clock events layer. - */ - if (cpu_isset(cpu, tick_broadcast_mask)) - cpu_set(cpu, tick_broadcast_oneshot_mask); - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - - if(!cpus_empty(tick_broadcast_oneshot_mask)) - tick_broadcast_set_event(ktime_get(), 1); - - return cpu_isset(cpu, tick_broadcast_oneshot_mask); + return 0; } /* -- cgit v1.2.3 From 74922be1485818ed368c4cf4f0b100f70bf01e08 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 7 Oct 2007 00:24:31 -0700 Subject: Fix timer_stats printout of events/sec When using /proc/timer_stats on ppc64 I noticed the events/sec field wasnt accurate. Sometimes the integer part was incorrect due to rounding (we werent taking the fractional seconds into consideration). The fraction part is also wrong, we need to pad the printf statement and take the bottom three digits of 1000 times the value. Signed-off-by: Anton Blanchard Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_stats.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 3c38fb5eae1b..c36bb7ed0301 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -327,8 +327,9 @@ static int tstats_show(struct seq_file *m, void *v) ms = 1; if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, - events / period.tv_sec, events * 1000 / ms); + seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", + events, events * 1000 / ms, + (events * 1000000 / ms) % 1000); else seq_printf(m, "%ld total events\n", events); -- cgit v1.2.3