From b615ebdac97c648a2ae7d23c5a0bbb3972adf928 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: shorten lines in unwinder to be <= 80 characters Andrew complained about > 80 character lines in the new unwinder. Fix that. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0d65b22f229c..d3f43c958ca1 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -235,6 +235,8 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) return n; } +#define MSG(txt) ops->warning(data, txt) + /* * x86-64 can have upto three kernel stacks: * process stack @@ -248,11 +250,12 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) return p > t && p < t + THREAD_SIZE - 3; } -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, struct stacktrace_ops *ops, void *data) { const unsigned cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -268,28 +271,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s if (unwind_init_frame_info(&info, tsk, regs) == 0) unw_ret = dump_trace_unwind(&info, &oad); } else if (tsk == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); else { if (unwind_init_blocked(&info, tsk) == 0) unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s\n", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { - ops->warning(data, "Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (unsigned long *)UNW_SP(&info); if (!stack) return; } else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else if (call_trace >= 1) return; else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else - ops->warning(data, "Inexact backtrace:\n"); + MSG("Inexact backtrace:\n"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3 From dd315df1767cf56bd4fb8d730fdff4a3d7e15d84 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: Compress stack unwinder output The unwinder has some extra newlines, which eat up loads of screen space when it spews. (See https://bugzilla.redhat.com/bugzilla/attachment.cgi?id=137900 for a nasty example). warning_symbol-> and warning-> already printk a newline, so don't add one in the strings passed to them. [AK: redone for new code] Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index d3f43c958ca1..eedd4e759c3a 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -280,7 +280,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s\n", + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { MSG("Leftover inexact backtrace:"); @@ -288,13 +288,13 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (!stack) return; } else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - MSG("Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3 From 36b2a8d5aff4cb3ee83d5e40447a8f073bcfe2fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: add X86_FEATURE_PEBS and detection Here is a patch (used by perfmon2) to detect the presence of the Precise Event Based Sampling (PEBS) feature for Intel 64-bit processors. The patch also adds the cpu_has_pebs macro. changelog: - adds X86_FEATURE_PEBS - adds cpu_has_pebs to test for X86_FEATURE_PEBS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4a..619af2e2fa26 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -835,6 +835,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); -- cgit v1.2.3 From e2764a1e306c986053a52b33748c33463cf888de Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: use BUILD_BUG_ON in FPU code Signed-off-by: Alexey Dobriyan Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/x86_64/kernel/i387.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index 3aa1e9bb781d..1d58c13bc6bc 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf) struct task_struct *tsk = current; int err = 0; - { - extern void bad_user_i387_struct(void); - if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave)) - bad_user_i387_struct(); - } + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.i387.fxsave)); if ((unsigned long)buf % 16) printk("save_i387: bad fpstate %p\n",buf); -- cgit v1.2.3 From bb81a09e55eaf7e5f798468ab971469b6f66a259 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86: all cpu backtrace When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu backtrace, so we get to see which CPU is holding the lock, and where. Cc: Andi Kleen Cc: Ingo Molnar Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/x86_64/kernel/nmi.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d99..27e95e7922c1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -12,14 +12,15 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include #include #include #include #include #include -#include #include #include +#include #include #include @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; int rc=0; @@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + #ifdef CONFIG_X86_MCE /* Could check oops_in_progress here too, but it's safer not too */ @@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); -- cgit v1.2.3 From 399287229c775a8962a852a761d65dc9475dec7c Mon Sep 17 00:00:00 2001 From: Aaron Durbin Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: Insert Local and IO APIC(s) into resource map Insert the Local APIC and IO APIC(s) into the resource tree. It allows the APIC resources to be visible within /proc/iomem. The patch also takes into account IO APIC(s) mapped in the PCI space by deferring the insertion until after PCI has allocated its necessary resources. Signed-off-by: Aaron Durbin Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/apic.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 4d9d5ed942b2..5c468971e645 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; +static struct resource *ioapic_resources; +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -585,6 +592,64 @@ static int __init detect_init_APIC (void) return 0; } +#ifdef CONFIG_X86_IO_APIC +static struct resource * __init ioapic_setup_resources(void) +{ +#define IOAPIC_RESOURCE_NAME_SIZE 11 + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk("IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif + void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -604,6 +669,11 @@ void __init init_apic_mappings(void) apic_mapped = 1; apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). @@ -613,7 +683,9 @@ void __init init_apic_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; @@ -625,6 +697,12 @@ void __init init_apic_mappings(void) apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } } } } -- cgit v1.2.3 From da68933e0a999fb13636653c710cca701b457ad2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] x86-64: dump_trace() atomicity fix Fix BUG: using smp_processor_id() in preemptible [00000001] code: in backtracer on preemptible debug kernels. Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index eedd4e759c3a..e37b4d77d5a8 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -254,7 +254,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, struct stacktrace_ops *ops, void *data) { - const unsigned cpu = smp_processor_id(); + const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -286,11 +286,11 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, MSG("Leftover inexact backtrace:"); stack = (unsigned long *)UNW_SP(&info); if (!stack) - return; + goto out; } else MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) - return; + goto out; else MSG("Full inexact backtrace again:"); } else @@ -385,6 +385,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, tinfo = current_thread_info(); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK +out: + put_cpu(); } EXPORT_SYMBOL(dump_trace); -- cgit v1.2.3 From bcddc0155f351ab3f06c6ede6d91fd399ef9e18f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] x86-64: miscellaneous entry.S adjustments This patch: - makes ret_from_sys_call no longer global (all external users were previously switched to use int_ret_from_sys_call) - adjusts placement of a CFI_{REMEMBER,RESTORE}_STATE pair to better fit logic flow - eliminates an unnecessary pair of CFI_{REMEMBER,RESTORE}_STATE - glues together function- and unwinder-wise the previously separate system_call and int_ret_from_sys_call function fragments Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/kernel/entry.S | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7d401b00d822..601d332c4b79 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -230,7 +230,6 @@ ENTRY(system_call) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) - CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -241,7 +240,6 @@ ENTRY(system_call) * Syscall return path ending with SYSRET (fast path) * Has incomplete stack frame and undefined top of stack. */ - .globl ret_from_sys_call ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ @@ -251,8 +249,8 @@ sysret_check: TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz sysret_careful + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: */ @@ -265,10 +263,10 @@ sysret_check: swapgs sysretq + CFI_RESTORE_STATE /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -306,7 +304,6 @@ badsys: /* Do syscall tracing */ tracesys: - CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -322,32 +319,13 @@ tracesys: call *sys_call_table(,%rax,8) 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ - jmp int_ret_from_sys_call - CFI_ENDPROC -END(system_call) /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. - */ -ENTRY(int_ret_from_sys_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-ARGOFFSET - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - CFI_REL_OFFSET rsp,RSP-ARGOFFSET - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ - CFI_REL_OFFSET rip,RIP-ARGOFFSET - CFI_REL_OFFSET rdx,RDX-ARGOFFSET - CFI_REL_OFFSET rcx,RCX-ARGOFFSET - CFI_REL_OFFSET rax,RAX-ARGOFFSET - CFI_REL_OFFSET rdi,RDI-ARGOFFSET - CFI_REL_OFFSET rsi,RSI-ARGOFFSET - CFI_REL_OFFSET r8,R8-ARGOFFSET - CFI_REL_OFFSET r9,R9-ARGOFFSET - CFI_REL_OFFSET r10,R10-ARGOFFSET - CFI_REL_OFFSET r11,R11-ARGOFFSET + */ + .globl int_ret_from_sys_call +int_ret_from_sys_call: cli TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) @@ -394,8 +372,6 @@ int_very_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi - cli - TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -411,7 +387,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(int_ret_from_sys_call) +END(system_call) /* * Certain special system calls that need to save a complete full stack frame. -- cgit v1.2.3 From 72690a21188586022a9e65cb6f1cc8845167555a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: Don't use nested idle loops Currently the idle loop has two nested loops -- one high level in cpu_idle and in some low level idle functions another one. Looping in the low level idle functions breaks the idle notifiers because interrupts waking up sleep states need to execute exit_idle() which is only in cpu_idle(). So don't do that, only loop in cpu_idle(). This only removes code. In some cases e.g. poll_idle the idle loop is a little longer now because cpu_idle checks more things. I hope that isn't a problem ACPI idle doesn't change behaviour because it never looped anyways. Cc: len.brown@intel.com Cc: eranian@hpl.hp.com Signed-off-by: Andi Kleen --- arch/x86_64/kernel/process.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7451a4c43c16..0b7b4caa4f74 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -108,17 +108,15 @@ void exit_idle(void) */ static void default_idle(void) { - local_irq_enable(); - current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + local_irq_disable(); + if (!need_resched()) { + /* Enables interrupts one instruction before HLT. + x86 special cases this so there is no race. */ + safe_halt(); + } else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } @@ -129,16 +127,7 @@ static void default_idle(void) */ static void poll_idle (void) { - local_irq_enable(); - - asm volatile( - "2:" - "testl %0,%1;" - "rep; nop;" - "je 2b;" - : : - "i" (_TIF_NEED_RESCHED), - "m" (current_thread_info()->flags)); + cpu_relax(); } void cpu_idle_wait(void) @@ -257,8 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) static void mwait_idle(void) { local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0,0); + mwait_idle_with_hints(0,0); } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -- cgit v1.2.3 From 7cd8b6861eb586aabe4c725cc0c259ce2e653695 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: remove last two pci_find offenders in the core code Resending as I believe the discussion about them established they were correct. Signed-off-by: Alan Cox Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/x86_64/kernel/pci-calgary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 37a770859e71..d2ea87a95268 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -921,7 +921,7 @@ static int __init calgary_init(void) error: do { - dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, + dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, dev); if (!dev) -- cgit v1.2.3 From 9c5f8be4625e73f17e28fea89399ed871a30e064 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: Mention PCI instead of RAM in NMI parity error message On modern systems RAM errors don't cause NMIs, but it's usually caused by PCI SERR. Mention PCI instead of RAM in the printk. Reported by r_hayashi@ctc-g.co.jp (Ryutaro Hayashi) Cc: r_hayashi@ctc-g.co.jp Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index e37b4d77d5a8..70bfaab9822c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -793,8 +793,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk(KERN_EMERG "You probably have a hardware problem with your " - "RAM chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); -- cgit v1.2.3 From 5df0287ecc4f53e68bbd188fa8258b555e6b734f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] x86-64: Extend clear_irq_vector Clear the irq releated entries in irq_vector, irq_domain and vector_irq instead of clearing irq_vector only. So when new irq is created, it could reuse that vector. (actually is the second loop scanning from FIRST_DEVICE_VECTOR+8). This could avoid the vectors are used up with enough module inserting and removing Cc: Eric W. Biedierman Cc: Muli Ben-Yehuda Signed-off-By: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/io_apic.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c80081a6ba41..f71461b1f03d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -750,6 +750,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) return vector; } +static void __clear_irq_vector(int irq) +{ + cpumask_t mask; + int cpu, vector; + + BUG_ON(!irq_vector[irq]); + + vector = irq_vector[irq]; + cpus_and(mask, irq_domain[irq], cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + irq_vector[irq] = 0; + irq_domain[irq] = CPU_MASK_NONE; +} + void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ @@ -1837,7 +1853,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From ab2bf0c1c689905b628dca94d0acd9c50e152468 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] x86-64: Use probe_kernel_address in arch/x86_64/* Instead of open coded __get_user Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 70bfaab9822c..264db33476ab 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,9 +30,9 @@ #include #include #include +#include #include -#include #include #include #include -- cgit v1.2.3 From b026872601976f666bae77b609dc490d1834bf77 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] x86-64: Try multiple timer variants in check_timer Instead of adding all kinds of more quirks try various timer routing variants in check_timer. In particular this tries to handle quirks from: - Nvidia NF2-4 reference BIOS: wrong timer override - Asus: Wrong timer override but no HPET table - ATI: require timer disabled in 8259 - Some boards: require timer enabled in 8259 We just try many of the the known variants in the hopefully right order in check_timer. Trying pin 0/2 on Nvidia suggested by Tim Hockin. TBD Experimental. Needs a lot of testing Signed-off-by: Andi Kleen --- arch/x86_64/kernel/early-quirks.c | 5 -- arch/x86_64/kernel/io_apic.c | 124 ++++++++++++++++---------------------- 2 files changed, 51 insertions(+), 78 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index 68273bff58cc..fb0c6da41b7e 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -69,11 +69,6 @@ static void nvidia_bugs(void) static void ati_bugs(void) { - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } } struct chipset { diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index f71461b1f03d..88fcc4ebbf6e 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; -static int disable_timer_pin_1 __initdata; - -int timer_over_8254 __initdata = 1; - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -348,29 +344,6 @@ static int __init disable_ioapic_setup(char *str) } early_param("noapic", disable_ioapic_setup); -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - - /* * Find the IRQ entry number of a certain pin. */ @@ -1579,10 +1552,33 @@ static inline void unlock_ExtINT_logic(void) * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for modern platforms only. */ -static inline void check_timer(void) + +static int try_apic_pin(int apic, int pin, char *msg) +{ + apic_printk(APIC_VERBOSE, KERN_INFO + "..TIMER: trying IO-APIC=%d PIN=%d %s", + apic, pin, msg); + + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (!no_timer_check && timer_irq_works()) { + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + return 1; + } + clear_IO_APIC_pin(apic, pin); + apic_printk(APIC_QUIET, KERN_ERR " .. failed\n"); + return 0; +} + +/* The function from hell */ +static void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -1603,61 +1599,43 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + /* Do this first, otherwise we get double interrupts on ATI boards */ + if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled")) + return; - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + /* Now try again with IRQ0 8259A enabled. + Assumes timer is on IO-APIC 0 ?!? */ + enable_8259A_irq(0); + unmask_IO_APIC_irq(0); + if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled")) + return; + disable_8259A_irq(0); - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); + /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides + on Nvidia boards */ + if (!(apic1 == 0 && pin1 == 0) && + try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled")) + return; + if (!(apic1 == 0 && pin1 == 2) && + try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled")) + return; + + /* Then try pure 8259A routing on the 8259 as reported by BIOS*/ + enable_8259A_irq(0); if (pin2 != -1) { - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", - apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } + if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS")) return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); } - apic_printk(APIC_VERBOSE," failed.\n"); + + /* Tried all possibilities to go through the IO-APIC. Now come the + really cheesy fallbacks. */ if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); -- cgit v1.2.3 From 58db85482743f5e3495d168c641c60ce1d3dfb06 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] calgary: phb_shift can be int Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index d2ea87a95268..f53b581dfd0b 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -740,7 +740,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar, { u64 val64; void __iomem *target; - unsigned long phb_shift = -1; + unsigned int phb_shift = ~0; /* silence gcc */ u64 mask; switch (busno_to_phbid(busnum)) { -- cgit v1.2.3 From b34e90b8f0f30151349134f87b5dc6ef75a5218c Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: use BIOS supplied BBARs and topology information Find the BBAR register address of each Calgary using the "Extended BIOS Data Area" rather than calculating it ourselves. Also get the bus topology (what PHB each bus is on) from Calgary rather than calculating it ourselves. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=7407. Signed-off-by: Laurent Vivier Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 161 ++++++++++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 36 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index f53b581dfd0b..afc0a53505f1 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -41,6 +41,7 @@ #include #include #include +#include #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ @@ -115,14 +116,35 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; +/* PHB debug registers */ + +static const unsigned long phb_debug_offsets[] = { + 0x4000 /* PHB 0 DEBUG */, + 0x5000 /* PHB 1 DEBUG */, + 0x6000 /* PHB 2 DEBUG */, + 0x7000 /* PHB 3 DEBUG */ +}; + +/* + * STUFF register for each debug PHB, + * byte 1 = start bus number, byte 2 = end bus number + */ + +#define PHB_DEBUG_STUFF_OFFSET 0x0020 + unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; +static struct rio_table_hdr *rio_table_hdr __initdata; +static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; + struct calgary_bus_info { void *tce_space; unsigned char translation_disabled; signed char phbid; + void __iomem *bbar; }; static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; @@ -475,6 +497,11 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, }; +static inline void __iomem * busno_to_bbar(unsigned char num) +{ + return bus_info[num].bbar; +} + static inline int busno_to_phbid(unsigned char num) { return bus_info[num].phbid; @@ -828,31 +855,9 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline unsigned int __init locate_register_space(struct pci_dev *dev) +static inline void __iomem * __init locate_register_space(struct pci_dev *dev) { - int rionodeid; - u32 address; - - /* - * Each Calgary has four busses. The first four busses (first Calgary) - * have RIO node ID 2, then the next four (second Calgary) have RIO - * node ID 3, the next four (third Calgary) have node ID 2 again, etc. - * We use a gross hack - relying on the dev->bus->number ordering, - * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1, - * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the - * second (id 3), and then it repeats modulo 14. - */ - rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2; - /* - * register space address calculation as follows: - * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) - * ChassisBase is always zero for x366/x260/x460 - * RioNodeId is 2 for first Calgary, 3 for second Calgary - */ - address = START_ADDRESS - - (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) + - (0x100000) * (rionodeid - CHASSIS_BASE); - return address; + return busno_to_bbar(dev->bus->number); } static void __init calgary_init_one_nontraslated(struct pci_dev *dev) @@ -864,15 +869,12 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev) static int __init calgary_init_one(struct pci_dev *dev) { - u32 address; void __iomem *bbar; int ret; BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - address = locate_register_space(dev); - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(address, 1024 * 1024); + bbar = locate_register_space(dev); if (!bbar) { ret = -ENODATA; goto done; @@ -898,6 +900,35 @@ static int __init calgary_init(void) { int ret = -ENODEV; struct pci_dev *dev = NULL; + int rio, phb, bus; + void __iomem *bbar; + void __iomem *target; + u8 start_bus, end_bus; + u32 val; + + for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + + if ( (rio_devs[rio]->type != COMPAT_CALGARY) && + (rio_devs[rio]->type != ALT_CALGARY) ) + continue; + + /* map entire 1MB of Calgary config space */ + bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + + for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + + target = calgary_reg(bbar, phb_debug_offsets[phb] | + PHB_DEBUG_STUFF_OFFSET); + val = be32_to_cpu(readl(target)); + start_bus = (u8)((val & 0x00FF0000) >> 16); + end_bus = (u8)((val & 0x0000FF00) >> 8); + for (bus = start_bus; bus <= end_bus; bus++) { + bus_info[bus].bbar = bbar; + bus_info[bus].phbid = phb; + } + } + } + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -962,13 +993,55 @@ static inline int __init determine_tce_table_size(u64 ram) return ret; } +static int __init build_detail_arrays(void) +{ + unsigned long ptr; + int i, scal_detail_size, rio_detail_size; + + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + printk(KERN_WARNING + "Calgary: MAX_NUMNODES too low! Defined as %d, " + "but system has %d nodes.\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); + return -ENODEV; + } + + switch (rio_table_hdr->version){ + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -ENODEV; + case 2: + scal_detail_size = 11; + rio_detail_size = 13; + break; + case 3: + scal_detail_size = 12; + rio_detail_size = 15; + break; + } + + ptr = ((unsigned long)rio_table_hdr) + 3; + for (i = 0; i < rio_table_hdr->num_scal_dev; + i++, ptr += scal_detail_size) + scal_devs[i] = (struct scal_detail *)ptr; + + for (i = 0; i < rio_table_hdr->num_rio_dev; + i++, ptr += rio_detail_size) + rio_devs[i] = (struct rio_detail *)ptr; + + return 0; +} + void __init detect_calgary(void) { u32 val; int bus; void *tbl; int calgary_found = 0; - int phb = -1; + unsigned long ptr; + int offset; /* * if the user specified iommu=off or iommu=soft or we found @@ -980,6 +1053,29 @@ void __init detect_calgary(void) if (!early_pci_allowed()) return; + ptr = (unsigned long)phys_to_virt(get_bios_ebda()); + + rio_table_hdr = NULL; + offset = 0x180; + while (offset) { + /* The block id is stored in the 2nd word */ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + /* set the pointer past the offset & block id */ + rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + break; + } + /* The next offset is stored in the 1st word. 0 means no more */ + offset = *((unsigned short *)(ptr + offset)); + } + if (!rio_table_hdr){ + printk(KERN_ERR "Calgary: Unable to locate " + "Rio Grande Table in EBDA - bailing!\n"); + return; + } + + if (build_detail_arrays()) + return; + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { @@ -990,12 +1086,6 @@ void __init detect_calgary(void) if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; - /* - * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) - * it is connected to releative to the clagary chip. - */ - phb = (phb + 1) % PHBS_PER_CALGARY; - if (info->translation_disabled) continue; @@ -1010,7 +1100,6 @@ void __init detect_calgary(void) if (!tbl) goto cleanup; info->tce_space = tbl; - info->phbid = phb; calgary_found = 1; break; } -- cgit v1.2.3 From eae93755540bae18aff46b8a0e621b5d65bd5380 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: check BBAR ioremap success when ioremapping This patch cleans up the previous "Use BIOS supplied BBAR information" patch. Mostly stylistic clenaups, but also check for ioremap failure when we ioremap the BBAR rather than when trying to use it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Acked-by: Laurent Vivier --- arch/x86_64/kernel/pci-calgary.c | 85 +++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 36 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index afc0a53505f1..8a1e4f35bc3c 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -138,7 +138,7 @@ static int calgary_detected __read_mostly = 0; static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; struct calgary_bus_info { void *tce_space; @@ -855,11 +855,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline void __iomem * __init locate_register_space(struct pci_dev *dev) -{ - return busno_to_bbar(dev->bus->number); -} - static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); @@ -874,15 +869,10 @@ static int __init calgary_init_one(struct pci_dev *dev) BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = locate_register_space(dev); - if (!bbar) { - ret = -ENODATA; - goto done; - } - + bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) - goto iounmap; + goto done; pci_dev_get(dev); dev->bus->self = dev; @@ -890,38 +880,39 @@ static int __init calgary_init_one(struct pci_dev *dev) return 0; -iounmap: - iounmap(bbar); done: return ret; } -static int __init calgary_init(void) +static int __init calgary_locate_bbars(void) { - int ret = -ENODEV; - struct pci_dev *dev = NULL; - int rio, phb, bus; + int ret; + int rioidx, phb, bus; void __iomem *bbar; void __iomem *target; + unsigned long offset; u8 start_bus, end_bus; u32 val; - for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + ret = -ENODATA; + for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { + struct rio_detail *rio = rio_devs[rioidx]; - if ( (rio_devs[rio]->type != COMPAT_CALGARY) && - (rio_devs[rio]->type != ALT_CALGARY) ) + if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) continue; /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); + if (!bbar) + goto error; for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; + target = calgary_reg(bbar, offset); - target = calgary_reg(bbar, phb_debug_offsets[phb] | - PHB_DEBUG_STUFF_OFFSET); val = be32_to_cpu(readl(target)); start_bus = (u8)((val & 0x00FF0000) >> 16); - end_bus = (u8)((val & 0x0000FF00) >> 8); + end_bus = (u8)((val & 0x0000FF00) >> 8); for (bus = start_bus; bus <= end_bus; bus++) { bus_info[bus].bbar = bbar; bus_info[bus].phbid = phb; @@ -929,6 +920,25 @@ static int __init calgary_init(void) } } + return 0; + +error: + /* scan bus_info and iounmap any bbars we previously ioremap'd */ + for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) + if (bus_info[bus].bbar) + iounmap(bus_info[bus].bbar); + + return ret; +} + +static int __init calgary_init(void) +{ + int ret; + struct pci_dev *dev = NULL; + + ret = calgary_locate_bbars(); + if (ret) + return ret; do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -1000,18 +1010,13 @@ static int __init build_detail_arrays(void) if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ printk(KERN_WARNING - "Calgary: MAX_NUMNODES too low! Defined as %d, " + "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", MAX_NUMNODES, rio_table_hdr->num_scal_dev); return -ENODEV; } switch (rio_table_hdr->version){ - default: - printk(KERN_WARNING - "Calgary: Invalid Rio Grande Table Version: %d\n", - rio_table_hdr->version); - return -ENODEV; case 2: scal_detail_size = 11; rio_detail_size = 13; @@ -1020,6 +1025,11 @@ static int __init build_detail_arrays(void) scal_detail_size = 12; rio_detail_size = 15; break; + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -EPROTO; } ptr = ((unsigned long)rio_table_hdr) + 3; @@ -1042,6 +1052,7 @@ void __init detect_calgary(void) int calgary_found = 0; unsigned long ptr; int offset; + int ret; /* * if the user specified iommu=off or iommu=soft or we found @@ -1061,27 +1072,29 @@ void __init detect_calgary(void) /* The block id is stored in the 2nd word */ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); break; } /* The next offset is stored in the 1st word. 0 means no more */ offset = *((unsigned short *)(ptr + offset)); } - if (!rio_table_hdr){ + if (!rio_table_hdr) { printk(KERN_ERR "Calgary: Unable to locate " "Rio Grande Table in EBDA - bailing!\n"); return; } - if (build_detail_arrays()) + ret = build_detail_arrays(); + if (ret) { + printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret); return; + } specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { int dev; struct calgary_bus_info *info = &bus_info[bus]; - info->phbid = -1; if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; -- cgit v1.2.3 From bff6547bb6a4e82c399d74e7fba78b12d2f162ed Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] Calgary: allow compiling Calgary in but not using it by default This patch makes it possible to compile Calgary in but not use it by default. In this mode, use 'iommu=calgary' to activate it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 9 +++++++++ arch/x86_64/kernel/pci-dma.c | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 8a1e4f35bc3c..0ddf29dae7e0 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -43,6 +43,12 @@ #include #include +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ + #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16) @@ -1061,6 +1067,9 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8a..683b7a5c1ab3 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; -- cgit v1.2.3 From 516d2836434d279fef164bb1b964e83425d312d2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] x86-64: Fix race in IO-APIC routing entry setup. Interrupt could happen between setting the IO-APIC entry and setting its interrupt data. Pointed out by Linus. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/io_apic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 88fcc4ebbf6e..eaf0b708e67d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -174,14 +174,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; union entry_union eu; eu.entry = e; - spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); spin_unlock_irqrestore(&ioapic_lock, flags); } -- cgit v1.2.3 From 103efcd9aac1de4da6a1477f2f3b9fcf35273a18 Mon Sep 17 00:00:00 2001 From: Ernie Petrides Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: fix perms/range of vsyscall vma in /proc/*/maps The final line of /proc//maps on x86_64 for native 64-bit tasks shows an incorrect ending address and incorrect permissions. There is only a single page mapped in this vsyscall region, and it is accessible for both read and execute. The patch below fixes this. (Since 32-bit-compat tasks have a real vma with correct perms/range, no change is necessary for that scenario.) Before the patch, a "cat /proc/self/maps | tail -1" shows this: ffffffffff600000-ffffffffffe00000 ---p 00000000 [...] After the patch, this is the output: ffffffffff600000-ffffffffff601000 r-xp 00000000 [...] Signed-off-by: Ernie Petrides Signed-off-by: Andi Kleen --- arch/x86_64/kernel/vsyscall.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f1..c3de9a09cd9f 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -290,6 +290,7 @@ static void __init map_vsyscall(void) extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } -- cgit v1.2.3 From 9a457324229db34d3bcb0b67360130c287289401 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: Rate limit no irq handler messages Signed-off-by: Andi Kleen --- arch/x86_64/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index e46c55856d40..0c06af6c13bc 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) if (likely(irq < NR_IRQS)) generic_handle_irq(irq); - else + else if (printk_ratelimit()) printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); -- cgit v1.2.3 From 9899f826fc90beba4f78083f6230e06cbe1050c9 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86-64: add genapic_force Add genapic_force. Used by the next Intel quirks patch. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/genapic.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 8e78a75d1866..b007433f96bb 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -33,7 +33,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; - +struct genapic *genapic_force; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. @@ -46,6 +46,13 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int max_apic = 0; + /* genapic selection can be forced because of certain quirks. + */ + if (genapic_force) { + genapic = genapic_force; + goto print; + } + #if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many -- cgit v1.2.3 From b0d0a4ba45760b10ecee9035ed45b442c1a6cc84 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86: fix the irqbalance quirk for E7320/E7520/E7525 Move the irqbalance quirks for E7320/E7520/E7525(Errata 23 in http://download.intel.com/design/chipsets/specupdt/30304203.pdf) to early quirks. And add a PCI quirk for these platforms to check(which happens very late during the boot) if the APIC routing is indeed set to default flat mode. This fixes the breakage(in x86_64) of this quirk due to cpu hotplug which selects physical mode instead of the logical flat(as needed for this errata workaround). Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/early-quirks.c | 13 +++++++++++++ arch/x86_64/kernel/smpboot.c | 8 ++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index fb0c6da41b7e..829698f6d049 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -71,6 +71,18 @@ static void ati_bugs(void) { } +static void intel_bugs(void) +{ + u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); + +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + struct chipset { u16 vendor; void (*f)(void); @@ -80,6 +92,7 @@ static struct chipset early_qrk[] = { { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, + { PCI_VENDOR_ID_INTEL, intel_bugs}, {} }; diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 62c2e747af58..4c161c208d5b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -60,6 +60,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1167,6 +1168,13 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + + if (num_online_cpus() > 8 && genapic == &apic_flat) { + printk(KERN_WARNING + "flat APIC routing can't be used with > 8 cpus\n"); + BUG(); + } + err = 0; return err; -- cgit v1.2.3 From ee58fad51a2a767cb2567706ace967705233d881 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: x86-64 add Intel BTS cpufeature bit and detection (take 2) Here is a small patch for x86-64 which adds a cpufeature flag and detection code for Intel's Branch Trace Store (BTS) feature. This feature can be found on Intel P4 and Core 2 processors among others. It can also be used by perfmon. changelog: - add CPU_FEATURE_BTS - add Branch Trace Store detection signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 619af2e2fa26..a570c81c8316 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -838,6 +838,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_ds) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } -- cgit v1.2.3 From f990fff427d68af3e4e1d16fe799c106abc0bf53 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: Regard MSRs in lapic_suspend()/lapic_resume() Read/Write APIC_LVTPC and APIC_LVTTHMR only, if get_maxlvt() returns certain values. This is done like everywhere else in i386/kernel/apic.c, so I guess its correct. Suspends/Resumes to disk fine and eleminates an smp_error_interrupt() here on a K8. AK: ported to x86-64 too Signed-off-by: Karsten Wiese Signed-off-by: Andi Kleen --- arch/x86_64/kernel/apic.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 5c468971e645..f0b00d8731cb 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -459,23 +459,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); @@ -486,10 +493,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; @@ -503,8 +513,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); -- cgit v1.2.3 From 86bd58bf4c383fde4e99b83ce917b091a072040d Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: Remove unused GET_APIC_VERSION call from clear_local_APIC Remove unused GET_APIC_VERSION call from clear_local_APIC() and __setup_APIC_LVTT(). Reported by D Binderman . Cc: Andi Kleen Cc: Ingo Molnar Signed-off-by: David Rientjes Signed-off-by: Andi Kleen --- arch/x86_64/kernel/apic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index f0b00d8731cb..124b2d27b4ac 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -140,7 +140,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -736,10 +735,9 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { - unsigned int lvtt_value, tmp_value, ver; + unsigned int lvtt_value, tmp_value; int cpu = smp_processor_id(); - ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) -- cgit v1.2.3 From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: add sysctl for kstack_depth_to_print Add sysctl for kstack_depth_to_print. This lets users change the amount of raw stack data printed in dump_stack() without having to reboot. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 264db33476ab..75ceccee178c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else -- cgit v1.2.3 From 3df0af0eb064a16bbdbe81b46bc72a4089f88d54 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86_64: clear_bss before set_intr_gate with early_idt_handler idt_table is in the .bss section, so clear_bss need to called at first Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/head64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 9561eb3c5b5c..cc230b93cd1c 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data) { int i; - for (i = 0; i < 256; i++) + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + + for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); asm volatile("lidt %0" :: "m" (idt_descr)); - clear_bss(); early_printk("Kernel alive\n"); -- cgit v1.2.3 From 8fb6e5f5db860113e71ce7b854382ed40559395b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86_64: interrupt array size should be aligned to NR_VECTORS interrupt array is referred for idt vectors instead of NR_IRQS, so change size to NR_VECTORS - FIRST_EXTERNAL_VECTOR. Also change to static. Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/i8259.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index c4ef801b765b..d73c79e821f1 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -void (*interrupt[NR_IRQS])(void) = { +/* for the irq vectors */ +static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), -- cgit v1.2.3 From d4c45718b3c0d3045eab803cd15fabbed1ea5bcd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Fix kobject_init() WARN_ON on resume from disk Make mce_remove_device() clean up the kobject in per_cpu(device_mce, cpu) after it has been unregistered. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen --- arch/x86_64/kernel/mce.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index bbea88801d88..a7440cb9c14f 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -652,6 +652,7 @@ static void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -- cgit v1.2.3 From 73ad8355d7db6a3cdcf313d4a4586a8f81b19c2f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: remove unused acpi_found_madt in mparse. remove unused acpi_found_madt in mparse.c Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/mpparse.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b147ab19fbd4..08072568847d 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -35,8 +35,6 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; -int acpi_found_madt; - /* * Various Linux-internal data structures created from the * MP-table. -- cgit v1.2.3 From 616779656989cb8c59177e35cb13e87028b1edc8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Synchronize RDTSC on single core AMD There is no guarantee that two RDTSCs in a row are monotonic, so don't assume it on single core AMD systems. This will make gettimeofday slower again Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a570c81c8316..05eaca41802b 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -732,11 +732,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - /* When there is only one core no need to synchronize RDTSC */ - if (num_possible_cpus() == 1) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); - else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.3 From e496a0da7f8110054d0ad4039245b0f49f9540f5 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] Calgary: remove unused variables Spotted by d binderman . Signed-off-by: Muli Ben-Yehuda Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 0ddf29dae7e0..3215675ab128 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -653,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) static void __init calgary_reserve_regions(struct pci_dev *dev) { unsigned int npages; - void __iomem *bbar; - unsigned char busnum; u64 start; struct iommu_table *tbl = dev->sysdata; - bbar = tbl->bbar; - busnum = dev->bus->number; - /* reserve bad_dma_address in case it's a legal address */ iommu_range_reserve(tbl, bad_dma_address, 1); -- cgit v1.2.3 From f3d73707a1e84f0687a05144b70b660441e999c7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Mark rdtsc as sync only for netburst, not for core2 On the Core2 cpus, the rdtsc instruction is not serializing (as defined in the architecture reference since rdtsc exists) and due to the deep speculation of these cores, it's possible that you can observe time go backwards between cores due to this speculation. Since the kernel already deals with this with the SYNC_RDTSC flag, the solution is simple, only assume that the instruction is serializing on family 15... The price one pays for this is a slightly slower gettimeofday (by a dozen or two cycles), but that increase is quite small to pay for a really-going-forward tsc counter. Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 05eaca41802b..6595a4ebe7f1 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -860,7 +860,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); -- cgit v1.2.3 From 446f713ba1afd68568139ae4691335ba273fa7f4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] unwinder: always use unlocked module list access in unwinder fallback We're already well protected against module unloads because module unload uses stop_machine(). The only exception is NMIs, but other users already risk lockless accesses here. This avoids some hackery in lockdep and also a potential deadlock This matches what i386 does. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 75ceccee178c..9864d195c408 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -317,9 +317,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define HANDLE_STACK(cond) \ do while (cond) { \ unsigned long addr = *stack++; \ - if (oops_in_progress ? \ - __kernel_text_address(addr) : \ - kernel_text_address(addr)) { \ + /* Use unlocked access here because except for NMIs \ + we should be already protected against module unloads */ \ + if (__kernel_text_address(addr)) { \ /* \ * If the address is either in the text segment of the \ * kernel, or in the region which contains vmalloc'ed \ -- cgit v1.2.3 From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder Tighten the requirements on both input to and output from the Dwarf2 unwinder. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 9864d195c408..4fdd162f0bef 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) { struct ops_and_data *oad = (struct ops_and_data *)context; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } -- cgit v1.2.3 From a0429d0d7a6116dedcb71d9128da904bf135f189 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Remove unwind stack pointer alignment forcing again This was added as a workaround for the fallback unwinder not supporting unaligned stack pointers properly. But now it was fixed to do that, so it's not needed anymore Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 4fdd162f0bef..a1641ffdffcf 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -309,12 +309,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (tsk && tsk != current) stack = (unsigned long *)tsk->thread.rsp; } - /* - * Align the stack pointer on word boundary, later loops - * rely on that (and corruption / debug info bugs can cause - * unaligned values here): - */ - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); /* * Print function call entries within a stack. 'cond' is the -- cgit v1.2.3 From d331e739f5ad2aaa9d8553891ba6ca823bdbce37 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Fix interrupt race in idle callback (3rd try) Idle callbacks has some races when enter_idle() sets isidle and subsequent interrupts that can happen on that CPU, before CPU goes to idle. Due to this, an IDLE_END can get called before IDLE_START. To avoid these races, disable interrupts before enter_idle and make sure that all idle routines do not enable interrupts before entering idle. Note that poll_idle() still has a this race as it has to enable interrupts before going to idle. But, all other idle routines have the race fixed. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- arch/x86_64/kernel/process.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 0b7b4caa4f74..a418ee4c8c62 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -127,6 +127,7 @@ static void default_idle(void) */ static void poll_idle (void) { + local_irq_enable(); cpu_relax(); } @@ -208,6 +209,12 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_irq_disable(); enter_idle(); idle(); /* In many cases the interrupt that ended idle @@ -245,8 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - local_irq_enable(); - mwait_idle_with_hints(0,0); + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -- cgit v1.2.3 From 3807fd46e94ab9f09e5ee3bff5e6515a94e9b3c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Clarify error message in GART code - Remove "Disabling IOMMU" message because it confuses people - Clarify that the GART IOMMU is refered to in other message Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-gart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 16261a8a3303..fc1960f1f243 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -601,10 +601,9 @@ void __init gart_iommu_init(void) (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not available.\n" + "but GART IOMMU not available.\n" KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return; -- cgit v1.2.3 From ad892f5e0d01f3c3b475a688d1ddc211cf3ea56d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] x86-64: check vector in setup_ioapic_dest to verify if need setup_IO_APIC_irq setup_IO_APIC_irqs could fail to get vector for some device when you have too many devices, because at that time only boot cpu is online. So check vector for irq in setup_ioapic_dest and call setup_IO_APIC_irq to make sure IO-APIC irq-routing table is initialized. Also seperate setup_IO_APIC_irq from setup_IO_APIC_irqs. Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Eric W. Biederman" Cc: Ingo Molnar Signed-off-by: Andrew Morton --- arch/x86_64/kernel/io_apic.c | 104 +++++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 43 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index eaf0b708e67d..2a1dcd5f69c2 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -789,27 +789,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) handle_edge_irq, "edge"); } } - -static void __init setup_IO_APIC_irqs(void) +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + int vector; unsigned long flags; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + } + + if (!apic && !IO_APIC_IRQ(irq)) + return; + + if (IO_APIC_IRQ(irq)) { + cpumask_t mask; + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); + if (vector < 0) + return; + + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + + ioapic_write_entry(apic, pin, entry); + + spin_lock_irqsave(&ioapic_lock, flags); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -821,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - } - irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; + setup_IO_APIC_irq(apic, pin, idx, irq); - if (IO_APIC_IRQ(irq)) { - cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - continue; - - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector; - - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); - - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -2139,7 +2149,15 @@ void __init setup_ioapic_dest(void) if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + if(!irq_vector[irq]) + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); } } -- cgit v1.2.3 From b65780e123ba9b762276482bbfb52836e4d41fd9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] unwinder: move .eh_frame to RODATA The .eh_frame section contents is never written to, so it can as well benefit from CONFIG_DEBUG_RODATA. Diff-ed against firstfloor tree. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/kernel/vmlinux.lds.S | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d9534e750d4f..6a1f8f491e5d 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -51,15 +51,6 @@ SECTIONS RODATA -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(8); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { -- cgit v1.2.3 From 64a26a731235b59c9d73bbe82c1f896d57400d37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] x86-64: Export smp_call_function_single smp_call_function() is exported, makes sense to export this one too. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9f74c883568c..1bf0e094f439 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -385,6 +385,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, put_cpu(); return 0; } +EXPORT_SYMBOL(smp_call_function_single); /* * this function sends a 'generic call function' IPI to all other CPUs -- cgit v1.2.3