From 89804c022fe32541f5dd40a69e48ff4678d9ad24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 May 2008 10:36:22 +0200 Subject: x86: fix csum_partial() export Fix this symbol export problem: Building modules, stage 2. MODPOST 193 modules ERROR: "csum_partial" [fs/reiserfs/reiserfs.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 This is due to a known weakness of symbol exports: if a symbol's only in-core user is an EXPORT_SYMBOL from a lib-y section, the symbol is not linked in. The solution is to move the export to x8664_ksyms_64.c - but the real solution would be to fix kbuild. Signed-off-by: Ingo Molnar --- arch/x86/lib/csum-partial_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index bc503f506903..bf51144d97e1 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -136,8 +136,6 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) (__force u32)sum); } -EXPORT_SYMBOL(csum_partial); - /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c -- cgit v1.2.3 From 5c1ea08215f1f830dfaf4819a5f22efca41c3832 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 25 May 2008 11:13:32 -0400 Subject: x86: enable preemption in delay The RT team has been searching for a nasty latency. This latency shows up out of the blue and has been seen to be as big as 5ms! Using ftrace I found the cause of the latency. pcscd-2995 3dNh1 52360300us : irq_exit (smp_apic_timer_interrupt) pcscd-2995 3dN.2 52360301us : idle_cpu (irq_exit) pcscd-2995 3dN.2 52360301us : rcu_irq_exit (irq_exit) pcscd-2995 3dN.1 52360771us : smp_apic_timer_interrupt (apic_timer_interrupt ) pcscd-2995 3dN.1 52360771us : exit_idle (smp_apic_timer_interrupt) Here's an example of a 400 us latency. pcscd took a timer interrupt and returned with "need resched" enabled, but did not reschedule until after the next interrupt came in at 52360771us 400us later! At first I thought we somehow missed a preemption check in entry.S. But I also noticed that this always seemed to happen during a __delay call. pcscd-2995 3dN.2 52360836us : rcu_irq_exit (irq_exit) pcscd-2995 3.N.. 52361265us : preempt_schedule (__delay) Looking at the x86 delay, I found my problem. In git commit 35d5d08a085c56f153458c3f5d8ce24123617faf, Andrew Morton placed preempt_disable around the entire delay due to TSC's not working nicely on SMP. Unfortunately for those that care about latencies this is devastating! Especially when we have callers to mdelay(8). Here I enable preemption during the loop and account for anytime the task migrates to a new CPU. The delay asked for may be extended a bit by the migration, but delay only guarantees that it will delay for that minimum time. Delaying longer should not be an issue. [ Thanks to Thomas Gleixner for spotting that cpu wasn't updated, and to place the rep_nop between preempt_enabled/disable. ] Signed-off-by: Steven Rostedt Cc: akpm@osdl.org Cc: Clark Williams Cc: Peter Zijlstra Cc: "Luis Claudio R. Goncalves" Cc: Gregory Haskins Cc: Linus Torvalds Cc: Andi Kleen Signed-off-by: Thomas Gleixner --- arch/x86/lib/delay_32.c | 31 +++++++++++++++++++++++++++---- arch/x86/lib/delay_64.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 4535e6d147ad..d710f2d167bb 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -44,13 +44,36 @@ static void delay_loop(unsigned long loops) static void delay_tsc(unsigned long loops) { unsigned long bclock, now; + int cpu; - preempt_disable(); /* TSC's are per-cpu */ + preempt_disable(); + cpu = smp_processor_id(); rdtscl(bclock); - do { - rep_nop(); + for (;;) { rdtscl(now); - } while ((now-bclock) < loops); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } + } preempt_enable(); } diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index bbc610518516..4c441be92641 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -31,14 +31,36 @@ int __devinit read_current_timer(unsigned long *timer_value) void __delay(unsigned long loops) { unsigned bclock, now; + int cpu; - preempt_disable(); /* TSC's are pre-cpu */ + preempt_disable(); + cpu = smp_processor_id(); rdtscl(bclock); - do { - rep_nop(); + for (;;) { rdtscl(now); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } } - while ((now-bclock) < loops); preempt_enable(); } EXPORT_SYMBOL(__delay); -- cgit v1.2.3